In a first step the financial data from Chinese firms was downloaded from Bloomberg and uploaded in R. It includes all firms with the country of domicile being China that were part of the following GICS subindustries: IT Consulting & Other Services (GICS 45102010), Data Processing & Outsourced Services (GICS 45102020), Internet Services & Infrastructure (GICS 45102030), Application Software (GICS 45103010), Systems Software (GICS 45103020), Communications Equipement (GICS 45201020), Technology Hardware, Storage & Peripherals (GICS 45202030), Electronic Equipment & Instruments (GICS 45203010), Electronic Components (GICS 45203015), Electronic Manufacturing Services (GICS 45203020), Technology Distributors (GICS 45203030), Internet & Direct Marketing Retail (GICS 25502020), Interactive Media & Services (GICS 50203010) and Movies & Entertainment (GICS 50202010
The following information for 16 calendar quarters (Q1 2019 -Q4 2022) was collected: (1) GICS codes at the Subindustry level, (2) average market cap, (3) revenue and (4) profits as measured by earnings before interest and taxes (EBIT), (6) currency and (7) their financial market ticker as a unique identifier.
The data sets were uloaded piece by piece due to size limits and then joined by a unique identifier.
### importing datasets
setwd("~/Desktop/Masterarbeit/Data/R_Master")
#currency + GICS
GICS_Cur_Exc <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/GICS_Currency_Exchange_onlyfirms.xlsx")
#market cap
MC1920 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/MarketCAP_qrt16,9_01012023_onlyfirms.xlsx")
MC2122 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/MarketCAP_qrt8,1_01012023_onlyfirms.xlsx")
#revenue
Rev1920 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/Revenue_qrt16,9_01012023_onlyfirms.xlsx")
Rev2122 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/Revenue_qrt8,1_01012023_onlyfirms.xlsx")
#IBIT
IBIT1920 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/IBIT_qrt16,9_01012023_onlyfirms.xlsx")
IBIT2122 <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/IBIT_qrt8,1_01012023_onlyfirms.xlsx")
### joining data sets 1 by 1
df2<- full_join(GICS_Cur_Exc, MC1920, by = "Ticker")
df3<- full_join(df2, MC2122, by = "Ticker")
df4<- full_join(df3, Rev1920, by = "Ticker")
df5<- full_join(df4, Rev2122, by = "Ticker")
df6<- full_join(df5, IBIT1920, by = "Ticker")
dffull<- full_join(df6, IBIT2122, by = "Ticker")In a second step the spot exchanged rates were added and all values were converted to USD. In order to achieve that the data set was first filtered for firms without values for currency (2 columns were excluded that missed not only currency but almost all necessary data including names (688496 CH Equity, 301379)). Attached below is an overview over the missing financial information in the data set and the variable names used in the analysis.
#Dropping rows with NAs for currency
dffull <- dffull %>% drop_na(Curncy)
#changing data from wide to long
df_long <- dffull %>%
pivot_longer(cols = starts_with("Market Cap:") | starts_with("Revenue:") | starts_with("EBIT:"),
names_to = c("Variable", "Quarter"),
names_sep = ":") %>%
pivot_wider(names_from = "Variable",
values_from = "value")
##### joining official exchange rates by quarter
exch_rate <- read_excel("~/Desktop/Masterarbeit/Data/Actual_Data/Leonard_Baum/Data/Calender_Quarter/Exchange_rates_formated.xlsx")
dflong_1 <- left_join(df_long, exch_rate, by = c('Quarter'))
###renaming Market Cap to Market_CAP
dflong_1 <- dplyr::rename(dflong_1, Market_Cap = "Market Cap")
###calculating USD values for Market_CAP, Revenue and EBIT
df_adj <- dflong_1 %>%
mutate(Adj_Market_Cap = if_else(Curncy == "USD", Market_Cap,
if_else(Curncy == "CNY", Market_Cap * Exch_CNY,
if_else(Curncy == "HKD", Market_Cap * Exch_HKD,
if_else(Curncy == "TWD", Market_Cap * Exch_TWD,
if_else(Curncy == "SGD", Market_Cap * Exch_SGD,
if_else(Curncy == "AUD", Market_Cap * Exch_AUD, NA_real_)))))),
Adj_Revenue = if_else(Curncy == "USD", Revenue,
if_else(Curncy == "CNY", Revenue * Exch_CNY,
if_else(Curncy == "HKD", Revenue * Exch_HKD,
if_else(Curncy == "TWD", Revenue * Exch_TWD,
if_else(Curncy == "SGD", Revenue * Exch_SGD,
if_else(Curncy == "AUD", Revenue * Exch_AUD, NA_real_)))))),
Adj_EBIT = if_else(Curncy == "USD", EBIT,
if_else(Curncy == "CNY", EBIT * Exch_CNY,
if_else(Curncy == "HKD", EBIT * Exch_HKD,
if_else(Curncy == "TWD", EBIT * Exch_TWD,
if_else(Curncy == "SGD", EBIT * Exch_SGD,
if_else(Curncy == "AUD", EBIT * Exch_AUD, NA_real_)))))))
# re-transform the relevant data back to a wide format
df_sel <- df_adj %>% select(1:3,6,15:17)
df_wide <- df_sel %>%
pivot_wider(names_from = Quarter,
values_from = c(Adj_Market_Cap, Adj_Revenue, Adj_EBIT))
# renaming the variables in preperation for the data analysis
# get list of variable names
var_names <- names(df_wide)
for (i in 1:length(var_names)) {
if (grepl("Adj_Market_Cap_Q", var_names[i])) {
new_name <- paste0("MC_Q", abs(as.numeric(gsub("[^0-9]", "", var_names[i]))-16)+1)
names(df_wide)[i] <- new_name
}
if (grepl("Adj_Revenue_Q", var_names[i])) {
new_name <- paste0("Rev_Q", abs(as.numeric(gsub("[^0-9]", "", var_names[i]))-16)+1)
names(df_wide)[i] <- new_name
}
if (grepl("Adj_EBIT_Q", var_names[i])) {
new_name <- paste0("EBIT_Q", abs(as.numeric(gsub("[^0-9]", "", var_names[i]))-16)+1)
names(df_wide)[i] <- new_name
}
}
#renaming variable subindustry
df_wide <- dplyr::rename(df_wide, GICS_SubInd = "GICS SubInd")
#turning GICS Subindustry into character variable
df_wide$GICS_SubInd <- as.character(df_wide$GICS_SubInd)
# check new variable names
names(df_wide) [1] "Ticker" "Name" "GICS_SubInd" "MC_Q1" "MC_Q2"
[6] "MC_Q3" "MC_Q4" "MC_Q5" "MC_Q6" "MC_Q7"
[11] "MC_Q8" "MC_Q9" "MC_Q10" "MC_Q11" "MC_Q12"
[16] "MC_Q13" "MC_Q14" "MC_Q15" "MC_Q16" "Rev_Q1"
[21] "Rev_Q2" "Rev_Q3" "Rev_Q4" "Rev_Q5" "Rev_Q6"
[26] "Rev_Q7" "Rev_Q8" "Rev_Q9" "Rev_Q10" "Rev_Q11"
[31] "Rev_Q12" "Rev_Q13" "Rev_Q14" "Rev_Q15" "Rev_Q16"
[36] "EBIT_Q1" "EBIT_Q2" "EBIT_Q3" "EBIT_Q4" "EBIT_Q5"
[41] "EBIT_Q6" "EBIT_Q7" "EBIT_Q8" "EBIT_Q9" "EBIT_Q10"
[46] "EBIT_Q11" "EBIT_Q12" "EBIT_Q13" "EBIT_Q14" "EBIT_Q15"
[51] "EBIT_Q16"
# count the number of NAs by variable
na_counts <- colSums(is.na(df_wide))
na_counts Ticker Name GICS_SubInd MC_Q1 MC_Q2 MC_Q3
0 0 0 445 434 423
MC_Q4 MC_Q5 MC_Q6 MC_Q7 MC_Q8 MC_Q9
396 365 347 321 264 239
MC_Q10 MC_Q11 MC_Q12 MC_Q13 MC_Q14 MC_Q15
206 177 158 131 105 79
MC_Q16 Rev_Q1 Rev_Q2 Rev_Q3 Rev_Q4 Rev_Q5
32 448 442 394 388 320
Rev_Q6 Rev_Q7 Rev_Q8 Rev_Q9 Rev_Q10 Rev_Q11
318 259 259 235 224 176
Rev_Q12 Rev_Q13 Rev_Q14 Rev_Q15 Rev_Q16 EBIT_Q1
168 165 168 130 119 454
EBIT_Q2 EBIT_Q3 EBIT_Q4 EBIT_Q5 EBIT_Q6 EBIT_Q7
445 402 394 327 319 265
EBIT_Q8 EBIT_Q9 EBIT_Q10 EBIT_Q11 EBIT_Q12 EBIT_Q13
264 243 233 181 175 172
EBIT_Q14 EBIT_Q15 EBIT_Q16
170 138 128
In the next step the four concentration measures - HHI Market Cap, CR4 Market Cap, HHI Revenue CR4 Revenue - were calculated for the 16 GICS subindsutries with the amount of firms ranging from 8 to 177 per market. The graphs below provides a graphical illustration of the development of the different market concentration measures – HHI Revenue, CR4 Revenue, HHI Market Cap, CR4 Market Cap – over the 16 quarters. The dotted line perpendicular to quarter 8 represents the cutoff point that delineates the time before and after the new regulatory approach took effect. For the two-revenue based concentration measures it is difficult to detect any pattern around the cutoff. This is different for the concentration measures based on market capitalization. The CR4 MC graph shows a general decrease in market concentration during the treatment period with the notable exception of the most concentrated markets. Meanwhile the more comprehensive HHI MC measure shows a sharp decline shortly after the cutoff for the two most concentrated markets (GICS 50203010: Interactive Media & Services; GICS 25502020: Internet & Direct Marketing Retail), while no substantial changes can be observed for the large number of low concentrated market.
### calculating HHI
# n_distinct(df_wide$GICS_SubInd)
###we have 16 different GICS subindustries
table(df_wide$GICS_SubInd)
25502020 45102010 45102020 45102030 45103010 45103020 45201020 45202030
35 86 8 22 129 39 114 48
45203010 45203015 45203020 45203030 45301010 45301020 50202010 50203010
177 175 16 15 47 144 47 40
#with the amount of firms ranging from 8 to 177 per sub-industry.
# create a list of unique GICS subindustries
subindustries <- unique(df_wide$GICS_SubInd)
for (q in 1:16) {
for (sub in subindustries) {
# subset the data for the current quarter and subindustry
subset_data <- df_wide[, c("GICS_SubInd", paste0("Rev_Q", q))]
subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
# calculate the total revenue in the subindustry, ignoring NAs
total_revenue <- sum(subset_data[, 2], na.rm = TRUE)
# calculate the market share of each firm in the subindustry, ignoring NAs
subset_data$market_share <- subset_data[, 2] / total_revenue
# calculate the squared market share of each firm and sum them up
subset_data$squared_market_share <- subset_data$market_share^2
hhi <- sum(subset_data$squared_market_share, na.rm = TRUE)
# assign the HHI value to the corresponding column and row in the original data
col_name <- paste0("HHIRev_SubInd_Q", q)
df_wide[df_wide$GICS_SubInd == sub, col_name] <- hhi
}
}
#Calculating the CR4 Concentration Ratio
for (q in 1:16) {
for (sub in subindustries) {
# subset the data for the current quarter and subindustry
subset_data <- df_wide[, c("GICS_SubInd", paste0("Rev_Q", q))]
subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
# calculate the market share of each firm in the subindustry, ignoring NAs
subset_data$market_share <- subset_data[, 2] / sum(subset_data[, 2], na.rm = TRUE)
# select the market shares of the four largest firms, ignoring NAs
top_four <- head(subset_data[order(subset_data$market_share, decreasing = TRUE), "market_share"], 4)
# calculate the CR4
cr4 <- sum(top_four, na.rm = TRUE)
# assign the CR4 value to the corresponding column and row in the original data
col_name <- paste0("CR4Rev_Subind_Q", q)
df_wide[df_wide$GICS_SubInd == sub, col_name] <- cr4
}
}
#Calculating the concentration measures for Market CAP
for (q in 1:16) {
for (sub in subindustries) {
# subset the data for the current quarter and subindustry
subset_data <- df_wide[, c("GICS_SubInd", paste0("MC_Q", q))]
subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
# calculate the total revenue in the subindustry, ignoring NAs
total_MC <- sum(subset_data[, 2], na.rm = TRUE)
# calculate the market share of each firm in the subindustry, ignoring NAs
subset_data$MC_share <- subset_data[, 2] / total_MC
# calculate the squared market share of each firm and sum them up
subset_data$squared_MC_share <- subset_data$MC_share^2
hhiMC <- sum(subset_data$squared_MC_share, na.rm = TRUE)
# assign the HHI value to the corresponding column and row in the original data
col_name <- paste0("HHIMC_SubInd_Q", q)
df_wide[df_wide$GICS_SubInd == sub, col_name] <- hhiMC
}
}
#Calculating the CR4 Concentration Ratio for MC
for (q in 1:16) {
for (sub in subindustries) {
# subset the data for the current quarter and subindustry
subset_data <- df_wide[, c("GICS_SubInd", paste0("MC_Q", q))]
subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
# calculate the market share of each firm in the subindustry, ignoring NAs
subset_data$MC_share <- subset_data[, 2] / sum(subset_data[, 2], na.rm = TRUE)
# select the market shares of the four largest firms, ignoring NAs
top_four <- head(subset_data[order(subset_data$MC_share, decreasing = TRUE), "MC_share"], 4)
# calculate the CR4
cr4_MC <- sum(top_four, na.rm = TRUE)
# assign the CR4 value to the corresponding column and row in the original data
col_name <- paste0("CR4MC_Subind_Q", q)
df_wide[df_wide$GICS_SubInd == sub, col_name] <- cr4_MC
}
}
#First we create the data frames for the concentration measures
# Step 1: HHI Revenue values per quarter per subindsutry
# select columns for HHI (REV) and GICS_SubInd
df_HHI <- df_wide %>%
select(GICS_SubInd, starts_with("HHIRev_SubInd_Q"))
##dropping all non-unique values so I have each subindustry only once
df_HHI <- df_HHI[!duplicated(df_HHI[, c("GICS_SubInd")]), ]
# Melt the data frame into long format
df_HHI_Rev <- melt(df_HHI, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "HHI")
# Convert the Quarter variable to numeric
df_HHI_Rev$Quarter <- as.numeric(gsub("HHIRev_SubInd_Q", "", df_HHI_Rev$Quarter))
# Plot the data using ggplot2
ggplot(df_HHI_Rev, aes(x = Quarter, y = HHI, group = GICS_SubInd, color = GICS_SubInd)) +
geom_line() +
labs(x = "Quarter", y = "HHI", color = "GICS Subindustry") +
geom_vline(xintercept = 8, linetype = "dotted") +
ggtitle("HHI Revenue") +
theme_minimal()# we can see some encouraging albeit small drop-offs for several
#gics subindustries at Q8 which is our intended cut-off point for the RDD
### Step 2: CR4 Revenue
# select columns for CR4 (REV) and GICS_SubInd
df_CR4 <- df_wide %>%
select(GICS_SubInd, starts_with("CR4Rev_SubInd_Q"))
##dropping all non-unique values so I have each subindustry only once
df_CR4 <- df_CR4[!duplicated(df_CR4[, c("GICS_SubInd")]), ]
# Melt the data frame into long format
df_CR4 <- melt(df_CR4, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "CR4")
# Convert the Quarter variable to numeric
df_CR4$Quarter <- as.numeric(sub("CR4Rev_Subind_Q", "", df_CR4$Quarter))
# Plot the data using ggplot2
ggplot(df_CR4, aes(x = Quarter, y = CR4, group = GICS_SubInd, color = GICS_SubInd)) +
geom_line() +
labs(x = "Quarter", y = "CR4", color = "GICS Subindustry") +
geom_vline(xintercept = 8, linetype = "dotted") +
ggtitle("CR4 Revenue") +
theme_minimal()### Step 3 HHI MC
#Create a new data frame with HHI (Market Cap) values per quarter per subindsutry
# select columns for HHI (Market Cap) and GICS_SubInd
df_HHIMC <- df_wide %>%
select(GICS_SubInd, starts_with("HHIMC_SubInd_Q"))
##dropping all non-unique values so I have each subindustry only once
df_HHIMC <- df_HHIMC[!duplicated(df_HHIMC[, c("GICS_SubInd")]), ]
# Melt the data frame into long format
df_HHIMC <- melt(df_HHIMC, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "HHIMC")
# Convert the Quarter variable to numeric
df_HHIMC$Quarter <- as.numeric(sub("HHIMC_SubInd_Q", "", df_HHIMC$Quarter))
# Plot the data using ggplot2
ggplot(df_HHIMC, aes(x = Quarter, y = HHIMC, group = GICS_SubInd, color = GICS_SubInd)) +
geom_line() +
labs(x = "Quarter", y = "HHIMC", color = "GICS Subindustry") +
geom_vline(xintercept = 8, linetype = "dotted") +
ggtitle("HHI Market Cap") +
theme_minimal()##here we can see the sharp dropoffs after the regulatory approach for the 2 most concentrated markets
# Step 4: CR4 Market Cap
#Create a new data frame with HHI (Market Cap) values per quarter per subindsutry
# select columns for CR4 (Market Cap) and GICS_SubInd
df_CR4MC <- df_wide %>%
select(GICS_SubInd, starts_with("CR4MC_SubInd_Q"))
##dropping all non-unique values so I have each subindustry only once
df_CR4MC <- df_CR4MC[!duplicated(df_CR4MC[, c("GICS_SubInd")]), ]
# Melt the data frame into long format
df_CR4MC <- melt(df_CR4MC, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "CR4MC")
# Convert the Quarter variable to numeric
df_CR4MC$Quarter <- as.numeric(sub("CR4MC_Subind_Q", "", df_CR4MC$Quarter))
# Plot the data using ggplot2
ggplot(df_CR4MC, aes(x = Quarter, y = CR4MC, group = GICS_SubInd, color = GICS_SubInd)) +
geom_line() +
labs(x = "Quarter", y = "CR4MC", color = "GICS Subindustry") +
geom_vline(xintercept = 8, linetype = "dotted") +
ggtitle("CR4 Market Cap") +
theme_minimal()The first method of testing hypthesis 1 was a Regression Discontinuity Design. However, this did not produce significant results. For the regression discontinuity design, both revenue-based concentration measures portray a small negative treatment effect (HHI Rev -0.05 and CR4 Rev -0.037) that is not significant (p value HHI Rev = 0.968, p value CR4 Rev = 0.810). For the MC concentration measures the regression discontinuity also portrays a small negative treatment effect (HHI MC -0.009 and CR4 MC -0.008) with very high p values (p value HHI MC = 0.955, p value CR4 MC = 0.957). This can be attributed to the low number of observations at the market level. 16 GICS subindustries with 256 observation points in total are in the lower range of acceptable data quantity for RDDs.
### Testing Hypothesis 1: Change in regulatory approach has led to reduced market concentration.
# Method 1 RDD
#First, RDD on HHI Rev
# Create a binary treatment variable based on cutoff
df_HHI_Rev$treatment <- ifelse(df_HHI_Rev$Quarter >= 9, 1, 0)
# Specify variable names
run_var <- df_HHI_Rev$Quarter
treat_var <- df_HHI_Rev$treatment
out_var <- df_HHI_Rev$HHI
# Specify cutoff value
cutoff <- 8
rddHHI_results <- rdrobust(y = df_HHI_Rev$HHI, x = df_HHI_Rev$Quarter, c = cutoff,
kernel = "tri", bwselect = "mserd", p = 1)
summary(rddHHI_results)Sharp RD estimates using local polynomial regression.
Number of Obs. 256
BW type mserd
Kernel Triangular
VCE method NN
Number of Obs. 112 144
Eff. Number of Obs. 32 48
Order est. (p) 1 1
Order bias (q) 2 2
BW est. (h) 2.654 2.654
BW bias (b) 4.608 4.608
rho (h/b) 0.576 0.576
Unique Obs. 7 9
=============================================================================
Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
=============================================================================
Conventional -0.005 0.127 -0.040 0.968 [-0.254 , 0.243]
Robust - - -0.055 0.956 [-0.353 , 0.333]
=============================================================================
### no significant results, it is not even close.
#Second CR4 Rev
# Create a binary treatment variable based on cutoff
df_CR4$treatment <- ifelse(df_CR4$Quarter >= 9, 1, 0)
# Specify variable names
run_var <- df_CR4$Quarter
treat_var <- df_CR4$treatment
out_var <- df_CR4$CR4
# Specify cutoff value
cutoff <- 8
rddCR4_results <- rdrobust(y = df_CR4$CR4, x = df_CR4$Quarter, c = cutoff,
kernel = "tri", bwselect = "mserd", p = 1)
summary(rddCR4_results)Sharp RD estimates using local polynomial regression.
Number of Obs. 256
BW type mserd
Kernel Triangular
VCE method NN
Number of Obs. 112 144
Eff. Number of Obs. 32 48
Order est. (p) 1 1
Order bias (q) 2 2
BW est. (h) 2.657 2.657
BW bias (b) 4.614 4.614
rho (h/b) 0.576 0.576
Unique Obs. 7 9
=============================================================================
Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
=============================================================================
Conventional -0.037 0.152 -0.240 0.810 [-0.335 , 0.262]
Robust - - -0.249 0.803 [-0.465 , 0.360]
=============================================================================
## again no significance
#third, HHI MC
# Create a binary treatment variable based on cutoff
df_HHIMC$treatment <- ifelse(df_HHIMC$Quarter >= 9, 1, 0)
# Specify variable names
run_var <- df_HHIMC$Quarter
treat_var <- df_HHIMC$treatment
out_var <- df_HHIMC$HHIMC
# Specify cutoff value
cutoff <- 8
rddHHIMC_results <- rdrobust(y = df_HHIMC$HHIMC, x = df_HHIMC$Quarter, c = cutoff,
kernel = "tri", bwselect = "mserd", p = 1)
summary(rddHHIMC_results)Sharp RD estimates using local polynomial regression.
Number of Obs. 256
BW type mserd
Kernel Triangular
VCE method NN
Number of Obs. 112 144
Eff. Number of Obs. 32 48
Order est. (p) 1 1
Order bias (q) 2 2
BW est. (h) 2.690 2.690
BW bias (b) 4.630 4.630
rho (h/b) 0.581 0.581
Unique Obs. 7 9
=============================================================================
Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
=============================================================================
Conventional -0.009 0.158 -0.056 0.955 [-0.319 , 0.301]
Robust - - -0.063 0.950 [-0.436 , 0.409]
=============================================================================
#again small negative effect but not even close to being significan
#fourth, CR4 MC
# Create a binary treatment variable based on cutoff
df_CR4MC$treatment <- ifelse(df_CR4MC$Quarter >= 9, 1, 0)
##small negative effect (like the others) but also not significant
# Specify variable names
run_var <- df_CR4MC$Quarter
treat_var <- df_CR4MC$treatment
out_var <- df_CR4MC$CR4MC
# Specify cutoff value
cutoff <- 8
rddCR4MC_results <- rdrobust(y = df_CR4MC$CR4MC, x = df_CR4MC$Quarter, c = cutoff,
kernel = "tri", bwselect = "mserd", p = 1)
summary(rddCR4MC_results)Sharp RD estimates using local polynomial regression.
Number of Obs. 256
BW type mserd
Kernel Triangular
VCE method NN
Number of Obs. 112 144
Eff. Number of Obs. 32 48
Order est. (p) 1 1
Order bias (q) 2 2
BW est. (h) 2.668 2.668
BW bias (b) 4.624 4.624
rho (h/b) 0.577 0.577
Unique Obs. 7 9
=============================================================================
Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
=============================================================================
Conventional -0.008 0.156 -0.054 0.957 [-0.314 , 0.297]
Robust - - -0.038 0.970 [-0.428 , 0.412]
=============================================================================
#also doesn't workAs a second method a two way fixed effects model (controlling for Subindustry and Quarter) was employed to test Hypothesis 1. However, only the CR4 market cap model portrays a small but highly significant positive treatment effect (0.038440 with a p value of 0.00158). One can only speculate for the reasons of this result . But with the more comprehensive HHI measure not being significant and CR4 only considering the top 4 firms in a market, we cannot confirm the hypothesis that the new regulatory approach has reduced market concentration in China’s digital economy as a whole.
#two way fixed effect model for all GICS subindustries
# for HHIMC using df_HHIMC
reg1 <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = df_HHIMC)
# no statistically significant results for Treatment
#for CR4MC using df_CR4MC
reg2 <- lm(CR4MC ~ GICS_SubInd + Quarter + treatment, data = df_CR4MC)
####significant results!
#but it doesnt really make any sense since the treatment effect should be negative not positive????
#maybe because it only takes into account the top 4 firms and they were not always hit equally it actually
#increased concentration among top 4 firms -->mere speculation
#now for HHI Revenue
reg3 <- lm(HHI ~ GICS_SubInd + Quarter + treatment, data = df_HHI_Rev)
## no significant results
# for CR4 Revenu using df_CR4
reg4 <- lm(CR4 ~ GICS_SubInd + Quarter + treatment, data = df_CR4)
### as expected no significance
stargazer(reg1, reg2, reg3, reg4, title=" Two-way fixed effect model Treatment effect",type = "text")
Two-way fixed effect model Treatment effect
==========================================================================
Dependent variable:
-------------------------------------------
HHIMC CR4MC HHI CR4
(1) (2) (3) (4)
--------------------------------------------------------------------------
GICS_SubInd45102010 -0.580*** -0.738*** -0.215*** -0.607***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45102020 -0.230*** -0.018 0.054*** 0.104***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45102030 -0.481*** -0.362*** 0.317*** 0.013
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45103010 -0.566*** -0.638*** -0.220*** -0.637***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45103020 -0.489*** -0.392*** -0.103*** -0.273***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45201020 -0.566*** -0.650*** -0.148*** -0.343***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45202030 -0.381*** -0.291*** 0.010 -0.118***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45203010 -0.514*** -0.546*** -0.177*** -0.385***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45203015 -0.570*** -0.643*** -0.185*** -0.484***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45203020 -0.308*** -0.081*** 0.320*** 0.061***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45203030 -0.281*** -0.141*** -0.038*** -0.001
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45301010 -0.524*** -0.488*** -0.118*** -0.298***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd45301020 -0.516*** -0.502*** -0.199*** -0.516***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd50202010 -0.504*** -0.434*** -0.138*** -0.324***
(0.019) (0.017) (0.011) (0.012)
GICS_SubInd50203010 0.282*** 0.025 0.500*** 0.091***
(0.019) (0.017) (0.011) (0.012)
Quarter -0.005*** -0.008*** -0.001 0.001
(0.001) (0.001) (0.001) (0.001)
treatment 0.008 0.038*** 0.0004 -0.013
(0.013) (0.012) (0.008) (0.008)
Constant 0.646*** 1.010*** 0.261*** 0.888***
(0.015) (0.014) (0.009) (0.010)
--------------------------------------------------------------------------
Observations 256 256 256 256
R2 0.955 0.967 0.978 0.983
Adjusted R2 0.952 0.965 0.977 0.982
Residual Std. Error (df = 238) 0.052 0.048 0.033 0.033
F Statistic (df = 17; 238) 296.239*** 409.123*** 636.382*** 823.882***
==========================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
In a second, subsequent analysis, a subset of China’s digital economy built from the primary markets of the BATs was examined. Baidu and Tencent both belong to the Interactive Media & Services subindustry (GICS 50203010) while Alibaba belongs to the Internet & Direct Marketing Retail subindustry (GICS 25502020). A two-way fixed effects models yields a significant treatment effect for the HHI Market Cap model.Zooming in the HHI MC model we can see that, unsurprisingly, the GICS subindustry functions as a very strong predictor of the variance in HHI MC in a given market. Nevertheless, adding the treatment effect increased the adjusted R-Squared from 0,781 to 0.829 and, thus, represents a strong and significant predictor able to explain the decrease in market concentration following the new regulatory proposal.
#Method 3. let's try to test the effect on BATs markets
### creating a subset for the primary gics subindustries of the BATs
dfBAT1 <- subset(df_HHIMC, GICS_SubInd %in% c("50203010", "25502020"))
reg5 <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = dfBAT1)
#significant treatment effect
#let's plot it
ggplot(dfBAT1, aes(x = Quarter, y = HHIMC, group = GICS_SubInd, color = GICS_SubInd)) +
geom_line() +
labs(x = "Quarter", y = "HHIMC", color = "GICS Subindustry") +
geom_vline(xintercept = 8, linetype = "dotted") +
ggtitle("HHI Market Cap") +
theme_minimal()#### Now let's do it for CR4
dfBAT2 <- subset(df_CR4MC, GICS_SubInd %in% c("50203010", "25502020"))
reg6 <- lm(CR4MC ~ GICS_SubInd + Quarter + treatment, data = dfBAT2)
##not sifnificant
#HHI Revenue
dfBAT3 <- subset(df_HHI_Rev, GICS_SubInd %in% c("50203010", "25502020"))
reg7 <- lm(HHI ~ GICS_SubInd + Quarter + treatment, data = dfBAT3)
#not significant
#CR4 Revenue
dfBAT4 <- subset(df_CR4, GICS_SubInd %in% c("50203010", "25502020"))
reg8 <- lm(CR4 ~ GICS_SubInd + Quarter + treatment, data = dfBAT4)
### also not significant
stargazer(reg5, reg6, reg7, reg8, title="Two-way fixed effects model BATs",type = "text")
Two-way fixed effects model BATs
=======================================================================
Dependent variable:
-----------------------------------------
HHIMC CR4MC HHI CR4
(1) (2) (3) (4)
-----------------------------------------------------------------------
GICS_SubInd50203010 0.282*** 0.025*** 0.500*** 0.091***
(0.024) (0.003) (0.017) (0.003)
Quarter 0.011** 0.001 -0.006* 0.001
(0.005) (0.001) (0.004) (0.001)
treatment -0.143*** -0.004 0.018 0.002
(0.047) (0.006) (0.033) (0.006)
Constant 0.586*** 0.958*** 0.296*** 0.883***
(0.031) (0.004) (0.022) (0.004)
-----------------------------------------------------------------------
Observations 32 32 32 32
R2 0.845 0.757 0.971 0.974
Adjusted R2 0.829 0.731 0.967 0.972
Residual Std. Error (df = 28) 0.067 0.008 0.047 0.008
F Statistic (df = 3; 28) 50.973*** 29.059*** 307.130*** 355.499***
=======================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
# only HHI MC has a significant treatment effect
###zooming in one the significant HHI Market Cap model
reg8a <- lm(HHIMC ~ GICS_SubInd , data = dfBAT1)
reg8b <- lm(HHIMC ~ GICS_SubInd + Quarter , data = dfBAT1)
reg8c <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = dfBAT1)
stargazer(reg8a, reg8b, reg8c, title="Zoom in HHI MC Model",type = "text")
Zoom in HHI MC Model
=========================================================================================
Dependent variable:
---------------------------------------------------------------------
HHIMC
(1) (2) (3)
-----------------------------------------------------------------------------------------
GICS_SubInd50203010 0.282*** 0.282*** 0.282***
(0.027) (0.027) (0.024)
Quarter -0.002 0.011**
(0.003) (0.005)
treatment -0.143***
(0.047)
Constant 0.608*** 0.629*** 0.586***
(0.019) (0.031) (0.031)
-----------------------------------------------------------------------------------------
Observations 32 32 32
R2 0.790 0.795 0.845
Adjusted R2 0.783 0.781 0.829
Residual Std. Error 0.075 (df = 30) 0.075 (df = 29) 0.067 (df = 28)
F Statistic 112.722*** (df = 1; 30) 56.223*** (df = 2; 29) 50.973*** (df = 3; 28)
=========================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
In this chunk we do the robustness checks for the HHI MC model. First plot indicates that the regression model is linear. Just in case I played around with the Quarter variable checking if the treatment effect is still significant with Q^2 which it is. When testing with the studentized Breusch-Pagan test for heteroscedasticity we did not have sufficient evidence to reject the null hypothesis of homoscedasticity. However, the p-value is relatively close to 0.05, indicating that there may be a possibility of heteroscedasticity. The subsequent plot indicates that the data may be a bit heteroscedastic in the higher range of the predictor variable(s). However, as an additional robust check I rerun the regression while logging the dependant variable and the treatment effect remains significant. Further, the result of another studentized Breusch-Pagan test for the logged dependent variable suggests that there is no significant evidence of heteroscedasticity in the logged model.Lastly, plots show that the residuals as well as the error terms are normally distributed. A gap in the middle of the last plots indicates that there is a significant difference in the dependent variable between the treated and untreated groups, and this effect of the regulatory approach is not captured by the other variables in the model. In conclusion, the robustness checks confirm the validity of the traetment effect. While the model may not be perfectly linear, it passes all tests and the treatment effect persists in the robust models.
# Add predicted values to the data frame
dfBAT1$predicted <- predict(reg8c)
# Create line plot of actual and predicted values across quarters
ggplot(data = dfBAT1, aes(x = Quarter, y = HHIMC, group = GICS_SubInd)) +
geom_line(aes(color = "Actual")) +
geom_line(aes(y = predicted, color = "Predicted")) +
scale_color_manual(values = c("Actual" = "black", "Predicted" = "red")) +
labs(x = "Quarter", y = "HHIMC", title = "Regression Model Performance")#### Linearity
# Plotting fitted values against residuals
plot(reg5, 1)#looks fine
#just in case I will do an additional robust check and see if the effect holds if Quarter is not linear but quadratic
r1 <- lm(HHIMC ~ GICS_SubInd + Quarter^2 + treatment, data = dfBAT1)
summary(r1)
Call:
lm(formula = HHIMC ~ GICS_SubInd + Quarter^2 + treatment, data = dfBAT1)
Residuals:
Min 1Q Median 3Q Max
-0.12754 -0.03566 0.01853 0.03991 0.15055
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.58575 0.03084 18.991 < 2e-16 ***
GICS_SubInd50203010 0.28159 0.02356 11.954 1.63e-12 ***
Quarter 0.01098 0.00514 2.136 0.04159 *
treatment -0.14291 0.04739 -3.015 0.00541 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.06663 on 28 degrees of freedom
Multiple R-squared: 0.8452, Adjusted R-squared: 0.8287
F-statistic: 50.97 on 3 and 28 DF, p-value: 1.813e-11
#nothing changes, very good
#### testing for homosecasticity
bp_test <- bptest(reg5)
bp_test
studentized Breusch-Pagan test
data: reg5
BP = 7.5308, df = 3, p-value = 0.05677
#The studentized Breusch-Pagan test tests for heteroscedasticity in the errors of a linear regression model. The null hypothesis is that the errors are homoscedastic, while the alternative hypothesis is that they are heteroscedastic.At the 0.05 significance level, we do not have sufficient evidence to reject the null hypothesis of homoscedasticity. However, the p-value is relatively close to 0.05, indicating that there may be some evidence of heteroscedasticity.
plot(reg5, 3)# the slight diagonal drop in the higher range of the fitted values is a bit worrying and suggests that the variance of the residuals is increasing, indicating that the data may be heteroscedastic in the higher range of the predictor variable(s).
#as a check I will log the dependant variable
r2 <- lm(log(HHIMC) ~ GICS_SubInd + Quarter + treatment, data = dfBAT1)
summary(r2)
Call:
lm(formula = log(HHIMC) ~ GICS_SubInd + Quarter + treatment,
data = dfBAT1)
Residuals:
Min 1Q Median 3Q Max
-0.21058 -0.04584 0.01383 0.04897 0.18958
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.560958 0.042618 -13.162 1.63e-13 ***
GICS_SubInd50203010 0.381510 0.032550 11.721 2.59e-12 ***
Quarter 0.018953 0.007103 2.668 0.01254 *
treatment -0.207275 0.065487 -3.165 0.00372 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.09207 on 28 degrees of freedom
Multiple R-squared: 0.8404, Adjusted R-squared: 0.8233
F-statistic: 49.14 on 3 and 28 DF, p-value: 2.786e-11
#still significant
bp_test_log <- bptest(r2)
bp_test_log
studentized Breusch-Pagan test
data: r2
BP = 5.8729, df = 3, p-value = 0.118
#The result of the studentized Breusch-Pagan test for the logged dependent variable suggests that there is no significant evidence of heteroscedasticity in the model.
# Furter, even in the logged model the treatment effect was still significant and it is expected that our results are not perfectly linear.
#### Normality of residuals ####
plot(reg5, 2)## residuals are close to the diagonal line indicating a normal distribution
plot(reg5$fitted.values, reg5$residuals)#looks fairly randomly distributed and suggests that there is no pattern in the errors and the assumptions of the linear regression model are being met. This indicates also that the residuals are uncorrelated and have constant variance, which are two important assumptions of linear regression.
# The gap in the middle indicates that there is a significant difference in the dependent variable between the treated and untreated groups, and the treatment effect is not captured by the other variables in the model.
#ok all in all everything seems robust!As a small extension I also checking whteher the treatment effect is also there when we include an additional GICS subindustry. In the data, there is also a comparatively small publicly traded Tencent Subsidiary called Tencent Music Entertainment which is part of the Movies & Entertainment subindustry (GICS 50202010). For claritiy (and consistency) this was exclueded from the main analysis, because none of the other numerous subsidiaries of the BATs have their own listing. As we can see, the tratment effect holds (again only for the HHI MC model),
#creating a subset with revelent subindustries (fo primary markets including tencent music: 50203010, 50202010,25502020)
#HHI MC
dfBAT5 <- subset(df_HHIMC, GICS_SubInd %in% c("50203010", "50202010", "25502020"))
#let's plot it first
ggplot(dfBAT5, aes(x = Quarter, y = HHIMC, group = GICS_SubInd, color = GICS_SubInd)) +
geom_line() +
labs(x = "Quarter", y = "HHIMC", color = "GICS Subindustry") +
geom_vline(xintercept = 8, linetype = "dotted") +
theme_minimal()reg9a <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = dfBAT5)
#significant again
#### Now let's do it for CR4
dfBAT6 <- subset(df_CR4MC, GICS_SubInd %in% c("50203010", "50202010", "25502020"))
reg9b <- lm(CR4MC ~ GICS_SubInd + Quarter + treatment, data = dfBAT6)
##not sifnificant
#HHI Revenue
dfBAT7 <- subset(df_HHI_Rev, GICS_SubInd %in% c("50203010", "50202010", "25502020"))
reg9c <- lm(HHI ~ GICS_SubInd + Quarter + treatment, data = dfBAT7)
#not significant
#CR4 Rev
dfBAT8 <- subset(df_CR4, GICS_SubInd %in% c("50203010", "50202010", "25502020"))
#not significant
reg9d <- lm(CR4 ~ GICS_SubInd + Quarter + treatment, data = dfBAT8)
### also not significant
stargazer(reg9a, reg9b, reg9c, reg9d, title="BAT Model Including Tencent Music", type = "text")
BAT Model Including Tencent Music
=========================================================================
Dependent variable:
-------------------------------------------
HHIMC CR4MC HHI CR4
(1) (2) (3) (4)
-------------------------------------------------------------------------
GICS_SubInd50202010 -0.504*** -0.434*** -0.138*** -0.324***
(0.021) (0.011) (0.015) (0.009)
GICS_SubInd50203010 0.282*** 0.025** 0.500*** 0.091***
(0.021) (0.011) (0.015) (0.009)
Quarter 0.007* 0.0002 -0.002 0.004***
(0.004) (0.002) (0.003) (0.002)
treatment -0.090** 0.003 0.001 -0.026*
(0.035) (0.018) (0.025) (0.015)
Constant 0.591*** 0.959*** 0.272*** 0.865***
(0.024) (0.013) (0.017) (0.010)
-------------------------------------------------------------------------
Observations 48 48 48 48
R2 0.971 0.980 0.979 0.983
Adjusted R2 0.968 0.978 0.977 0.981
Residual Std. Error (df = 43) 0.060 0.032 0.042 0.025
F Statistic (df = 4; 43) 357.124*** 530.292*** 502.808*** 604.128***
=========================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
The following models explore if market concentration can function as a predictor of profits in the digital economy by looking both at total profits and profit margins. Looking total profits by firm first, all concentration measures – HHI Rev, CR4 Rev, HHI MC, CR4 MC – have a positive and statistically significant relationship with EBIT. The crucial problem here for the concentration measures to be good predictor is that the R-squared values in the regression outputs are very low (R-squared of around 1 percent), indicating that the market concentration measures included in the model do not have a strong relationship with firms’ profits and that other factors play a more important role. The low explanatory value holds true for a time-fixed model (controlling for Quarter) and a two-way fixed effects model (controlling additionally for GICS Subindustry).
### Testing hypothesis 2: Lower market concentration is correlated with a reduction in firm's profits
# creating a new data set with market concentration measures and profits
df_EBIT <- df_wide %>% select(2,3,36:115)
#creating different data sets per market concentration measures
df_EBIT_long <- df_EBIT %>%
pivot_longer(
cols = starts_with("EBIT_Q"),
names_to = "Quarter",
values_to = "EBIT"
) %>%
select(Name, GICS_SubInd, Quarter, EBIT)
df_EBIT_long$Quarter <- as.numeric(sub("EBIT_Q", "", df_EBIT_long$Quarter))
df_HHIRev_long <- df_EBIT %>%
pivot_longer(
cols = starts_with("HHIRev_SubInd_Q"),
names_to = "Quarter",
values_to = "HHIRev"
) %>%
select(Name, GICS_SubInd, Quarter, HHIRev)
df_HHIRev_long$Quarter <- as.numeric(sub("HHIRev_SubInd_Q", "", df_HHIRev_long$Quarter))
df_CR4Rev_long <- df_EBIT %>%
pivot_longer(
cols = starts_with("CR4Rev_Subind_Q"),
names_to = "Quarter",
values_to = "CR4Rev"
) %>%
select(Name, GICS_SubInd, Quarter, CR4Rev)
df_CR4Rev_long$Quarter <- as.numeric(sub("CR4Rev_Subind_Q", "", df_CR4Rev_long$Quarter))
df_HHIMC_long <- df_EBIT %>%
pivot_longer(
cols = starts_with("HHIMC_SubInd_Q"),
names_to = "Quarter",
values_to = "HHIMC"
) %>%
select(Name, GICS_SubInd, Quarter, HHIMC)
df_HHIMC_long$Quarter <- as.numeric(sub("HHIMC_SubInd_Q", "", df_HHIMC_long$Quarter))
df_CR4MC_long <- df_EBIT %>%
pivot_longer(
cols = starts_with("CR4MC_Subind_Q"),
names_to = "Quarter",
values_to = "CR4MC"
) %>%
select(Name, GICS_SubInd, Quarter, CR4MC)
df_CR4MC_long$Quarter <- as.numeric(sub("CR4MC_Subind_Q", "", df_CR4MC_long$Quarter))
#mergins the dfs
EBIT_merg <- df_EBIT_long %>%
left_join(df_HHIRev_long, by = c("Name", "GICS_SubInd", "Quarter")) %>%
left_join(df_CR4Rev_long, by = c("Name", "GICS_SubInd", "Quarter")) %>%
left_join(df_HHIMC_long, by = c("Name", "GICS_SubInd", "Quarter")) %>%
left_join(df_CR4MC_long, by = c("Name", "GICS_SubInd", "Quarter"))
###starting with HHI REV
reg10a <- lm(EBIT ~ HHIRev, data = EBIT_merg )
### HHI highly significant, increase from 0 to 1 (maybe use 1-10000 scale instead)
#results in increased profits of 9.97 Billion in the hypothetical case of fully concentrated markets
# but very low explanatory value --> R-squared around 1 percent
###doing the same with CR4 revenue
reg10b <- lm(EBIT ~ CR4Rev, data = EBIT_merg )
### same thing, higher CR4 higher profits, even lower R-squared below 1 percent
###doing the same with HHI market cap
reg10c <- lm(EBIT ~ HHIMC, data = EBIT_merg )
### same stuff, but slightly highger R-squared
###doing the same with CR4 market cap
reg10d <- lm(EBIT ~ CR4MC, data = EBIT_merg )
#same stuff
stargazer(reg10a, reg10b, reg10c, reg10d, title="Naive Total Profits Regression Model",type = "text")
Naive Total Profits Regression Model
======================================================================================================================
Dependent variable:
-------------------------------------------------------------------------------------
EBIT
(1) (2) (3) (4)
----------------------------------------------------------------------------------------------------------------------
HHIRev 9,969,480,093.000***
(707,071,810.000)
CR4Rev 5,048,708,631.000***
(551,009,734.000)
HHIMC 9,268,733,745.000***
(635,073,963.000)
CR4MC 6,219,981,258.000***
(571,910,343.000)
Constant -359,884,861.000*** -1,636,945,457.000*** -197,050,969.000 -1,838,777,713.000***
(138,736,441.000) (295,082,969.000) (130,188,360.000) (272,172,156.000)
----------------------------------------------------------------------------------------------------------------------
Observations 13,962 13,962 13,962 13,962
R2 0.014 0.006 0.015 0.008
Adjusted R2 0.014 0.006 0.015 0.008
Residual Std. Error (df = 13960) 12,678,529,762.000 12,730,264,525.000 12,672,174,394.000 12,714,734,150.000
F Statistic (df = 1; 13960) 198.801*** 83.954*** 213.006*** 118.283***
======================================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
# all models with very low explanatory power
# let's check if this holds when doing a time fixed effects model
#HHI_Rev
reg11a <- lm(EBIT ~ HHIRev + Quarter, data = EBIT_merg )
# still positive effect of market concentration on profits but still low explanability
#CR4 Rev
reg11b <- lm(EBIT ~ CR4Rev + Quarter, data = EBIT_merg )
#HHI MC
reg11c <- lm(EBIT ~ HHIMC + Quarter, data = EBIT_merg )
### same stuff, but slightly highger R-squared
#CR4 market cap
reg11d <- lm(EBIT ~ CR4MC + Quarter, data = EBIT_merg )
stargazer(reg11a, reg11b, reg11c, reg11d, title = "Time-fixed regression model total profits",type = "text")
Time-fixed regression model total profits
======================================================================================================================
Dependent variable:
-------------------------------------------------------------------------------------
EBIT
(1) (2) (3) (4)
----------------------------------------------------------------------------------------------------------------------
HHIRev 9,972,829,773.000***
(707,073,846.000)
CR4Rev 5,043,189,030.000***
(551,045,076.000)
HHIMC 9,292,806,672.000***
(635,277,881.000)
CR4MC 6,260,620,865.000***
(572,499,263.000)
Quarter 25,734,391.000 21,924,552.000 33,525,476.000 36,326,300.000
(23,648,451.000) (23,746,403.000) (23,644,118.000) (23,740,334.000)
Constant -593,145,950.000** -1,832,566,667.000*** -503,186,769.000** -2,185,222,297.000***
(255,333,272.000) (363,272,053.000) (252,116,368.000) (354,023,950.000)
----------------------------------------------------------------------------------------------------------------------
Observations 13,962 13,962 13,962 13,962
R2 0.014 0.006 0.015 0.009
Adjusted R2 0.014 0.006 0.015 0.008
Residual Std. Error (df = 13959) 12,678,446,120.000 12,730,331,805.000 12,671,715,779.000 12,714,123,339.000
F Statistic (df = 2; 13959) 99.994*** 42.403*** 107.516*** 60.318***
======================================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
# We can also check for two-way fixed effects
#HHI_Rev
reg12a <- lm(EBIT ~ HHIRev + Quarter + GICS_SubInd, data = EBIT_merg )
# still positive effect of market concentration on profits but still low explanability
#CR4 Rev
reg12b <- lm(EBIT ~ CR4Rev + Quarter + GICS_SubInd, data = EBIT_merg )
#HHI MC
reg12c <- lm(EBIT ~ HHIMC + Quarter + GICS_SubInd, data = EBIT_merg )
### same stuff, but slightly highger R-squared
#CR4 market cap
reg12d <- lm(EBIT ~ CR4MC + Quarter + GICS_SubInd, data = EBIT_merg )
stargazer(reg12a, reg12b, reg12c, reg12d, title = "Two-way fixed effects model total profits ",type = "text")
Two-way fixed effects model total profits
========================================================================================================================
Dependent variable:
---------------------------------------------------------------------------------------
EBIT
(1) (2) (3) (4)
------------------------------------------------------------------------------------------------------------------------
HHIRev -2,415,021,042.000
(5,205,525,905.000)
CR4Rev 479,461,543.000
(3,354,860,381.000)
HHIMC -2,164,839,308.000
(3,606,344,556.000)
CR4MC 1,123,112,775.000
(2,460,722,039.000)
Quarter 19,545,723.000 19,681,242.000 16,706,081.000 22,771,100.000
(23,578,854.000) (23,633,794.000) (24,171,825.000) (24,365,414.000)
GICS_SubInd45102010 -584,207,090.000 234,849,696.000 -1,333,106,249.000 774,717,364.000
(1,396,142,449.000) (2,193,394,789.000) (2,275,545,105.000) (1,992,978,141.000)
GICS_SubInd45102020 445,667,812.000 270,969,761.000 -212,284,361.000 345,196,024.000
(1,682,256,170.000) (1,694,463,397.000) (1,881,667,415.000) (1,661,075,666.000)
GICS_SubInd45102030 830,633,091.000 73,448,698.000 -994,179,674.000 500,234,896.000
(1,905,718,755.000) (1,000,052,745.000) (2,046,611,156.000) (1,362,144,641.000)
GICS_SubInd45103010 -516,036,290.000 330,383,856.000 -1,219,756,582.000 741,752,135.000
(1,405,719,770.000) (2,283,334,952.000) (2,216,793,486.000) (1,758,674,162.000)
GICS_SubInd45103020 -71,331,687.000 326,570,193.000 -895,621,486.000 644,020,358.000
(1,072,201,244.000) (1,305,376,996.000) (2,027,930,694.000) (1,343,090,152.000)
GICS_SubInd45201020 -199,396,974.000 330,622,259.000 -1,079,324,794.000 897,733,036.000
(1,112,013,145.000) (1,392,640,320.000) (2,218,257,495.000) (1,784,544,673.000)
GICS_SubInd45202030 1,330,985,604.000 1,374,983,230.000 475,097,003.000 1,644,830,644.000
(886,989,526.000) (974,130,543.000) (1,659,569,957.000) (1,140,580,134.000)
GICS_SubInd45203010 -112,234,750.000 511,427,915.000 -810,336,196.000 944,438,045.000
(1,211,160,423.000) (1,511,970,088.000) (2,038,437,000.000) (1,556,662,672.000)
GICS_SubInd45203015 227,389,032.000 910,096,306.000 -574,338,857.000 1,402,359,536.000
(1,237,435,370.000) (1,790,345,210.000) (2,223,123,959.000) (1,760,213,759.000)
GICS_SubInd45203020 5,854,035,096.000*** 5,068,727,043.000*** 4,400,640,541.000*** 5,192,967,555.000***
(2,048,326,751.000) (1,254,639,632.000) (1,697,313,767.000) (1,256,351,458.000)
GICS_SubInd45203030 691,185,744.000 788,799,253.000 147,655,019.000 954,767,204.000
(1,170,402,791.000) (1,151,515,483.000) (1,570,090,571.000) (1,207,838,227.000)
GICS_SubInd45301010 650,211,316.000 1,102,632,896.000 -206,563,640.000 1,522,265,119.000
(1,124,907,413.000) (1,381,717,869.000) (2,139,246,517.000) (1,543,523,371.000)
GICS_SubInd45301020 431,306,471.000 1,168,406,030.000 -212,945,021.000 1,483,701,820.000
(1,314,614,944.000) (1,902,907,062.000) (2,044,947,089.000) (1,462,038,318.000)
GICS_SubInd50202010 -955,436,175.000 -459,636,869.000 -1,726,395,724.000 -124,318,990.000
(1,168,272,627.000) (1,416,705,509.000) (2,062,622,377.000) (1,407,726,416.000)
GICS_SubInd50203010 12,818,297,784.000*** 11,599,444,560.000*** 12,214,504,712.000*** 11,615,368,924.000***
(2,706,713,162.000) (995,451,703.000) (1,345,199,626.000) (951,024,910.000)
Constant 547,355,879.000 -497,109,213.000 1,291,821,575.000 -1,181,522,162.000
(1,532,860,892.000) (3,064,500,265.000) (2,393,762,998.000) (2,543,329,396.000)
------------------------------------------------------------------------------------------------------------------------
Observations 13,962 13,962 13,962 13,962
R2 0.025 0.025 0.025 0.025
Adjusted R2 0.023 0.023 0.023 0.023
Residual Std. Error (df = 13944) 12,617,378,083.000 12,617,466,220.000 12,617,312,433.000 12,617,381,213.000
F Statistic (df = 17; 13944) 20.728*** 20.716*** 20.737*** 20.728***
========================================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
Next we look at profit margins that are calculated on the basis for EBIT and Revenue. Few Columns with revenue = 0, NA were excluded because otherwise the profit margins become infinity. Running the same regression models on profits margins results once more in a statistically significant relationship between the concentration measures. However, this time it is negative. Nevertheless, again the models have very low R Squared value, which holds true when looking at the time-invariant effect and the two-way fixed effects model. All in all, market concentration explains only around 0.1 percent of the variation in profit margins.
#calculating profit margins
##converting revenue values to long format
df_REV_long <- df_wide %>%
pivot_longer(
cols = starts_with("REV_Q"),
names_to = "Quarter",
values_to = "REV"
) %>%
select(Name, GICS_SubInd, Quarter, REV)
#converting quarter to numeric
df_REV_long$Quarter <- as.numeric(sub("Rev_Q", "", df_REV_long$Quarter))
df_Profitmarg <- df_EBIT_long %>%
left_join(df_REV_long, by = c("Name", "GICS_SubInd", "Quarter"))
#calculating profimarg. round((df$EBIT / df$REV) * 100, 2)
df_Profitmarg$Profitmarg <- ifelse(is.na(df_Profitmarg$REV) | is.na(df_Profitmarg$EBIT), NA, round((df_Profitmarg$EBIT / df_Profitmarg$REV) * 100, 2))
df_Profitmarg <- df_Profitmarg %>%
left_join(df_HHIRev_long, by = c("Name", "GICS_SubInd", "Quarter"))
df_Profitmarg <- df_Profitmarg %>%
left_join(df_CR4Rev_long, by = c("Name", "GICS_SubInd", "Quarter"))
df_Profitmarg <- df_Profitmarg %>%
left_join(df_HHIMC_long, by = c("Name", "GICS_SubInd", "Quarter"))
df_Profitmarg <- df_Profitmarg %>%
left_join(df_CR4MC_long, by = c("Name", "GICS_SubInd", "Quarter"))
#### we have the problem that for some obervations are negative and positive infinity due to revenue being 0
#in order to do a regression we have to exclude these observations
df_Profitmarg <- df_Profitmarg[!df_Profitmarg$Profitmarg %in% c(-Inf, Inf),]
#effect of HHIRev on Profitmarg
reg13a <- lm(Profitmarg ~ HHIRev, data = df_Profitmarg, na.action = na.omit)
summary(reg13a)
Call:
lm(formula = Profitmarg ~ HHIRev, data = df_Profitmarg, na.action = na.omit)
Residuals:
Min 1Q Median 3Q Max
-24578116 -1352 -460 1445 35594
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3669 2303 1.593 0.111121
HHIRev -44692 11969 -3.734 0.000189 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 209000 on 13853 degrees of freedom
(4335 observations deleted due to missingness)
Multiple R-squared: 0.001006, Adjusted R-squared: 0.0009334
F-statistic: 13.94 on 1 and 13853 DF, p-value: 0.0001892
#coefficient negative and extremely weak R squared
#effect of CR4Rev on Profitmarg
reg13b <- lm(Profitmarg ~ CR4Rev, data = df_Profitmarg, na.action = na.omit)
##same negative coefficient an weak R squared
#HHI MC
reg13c <- lm(Profitmarg ~ HHIMC, data = df_Profitmarg, na.action = na.omit)
#same negative and weak
#CR4 MC
reg13d <- lm(Profitmarg ~ CR4MC, data = df_Profitmarg, na.action = na.omit)
#again
stargazer(reg13a, reg13b, reg13c, reg13d, title = "Naive Model Profit Margins firm level", type = "text")
Naive Model Profit Margins firm level
===========================================================================================
Dependent variable:
----------------------------------------------------------
Profitmarg
(1) (2) (3) (4)
-------------------------------------------------------------------------------------------
HHIRev -44,691.610***
(11,968.610)
CR4Rev -22,710.000**
(9,141.645)
HHIMC -46,979.970***
(10,824.840)
CR4MC -28,237.340***
(9,535.257)
Constant 3,669.307 9,477.674* 3,547.671 10,485.920**
(2,302.995) (4,878.042) (2,161.850) (4,515.542)
-------------------------------------------------------------------------------------------
Observations 13,855 13,855 13,855 13,855
R2 0.001 0.0004 0.001 0.001
Adjusted R2 0.001 0.0004 0.001 0.001
Residual Std. Error (df = 13853) 208,950.900 209,009.500 208,914.100 208,989.900
F Statistic (df = 1; 13853) 13.943*** 6.171** 18.836*** 8.770***
===========================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
### now we test again for the time fixed effect
#effect of HHIRev on Profitmarg
reg14a <- lm(Profitmarg ~ HHIRev + Quarter, data = df_Profitmarg, na.action = na.omit)
summary(reg13a)
Call:
lm(formula = Profitmarg ~ HHIRev, data = df_Profitmarg, na.action = na.omit)
Residuals:
Min 1Q Median 3Q Max
-24578116 -1352 -460 1445 35594
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3669 2303 1.593 0.111121
HHIRev -44692 11969 -3.734 0.000189 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 209000 on 13853 degrees of freedom
(4335 observations deleted due to missingness)
Multiple R-squared: 0.001006, Adjusted R-squared: 0.0009334
F-statistic: 13.94 on 1 and 13853 DF, p-value: 0.0001892
#effect of CR4Rev on Profitmarg
reg14b <- lm(Profitmarg ~ CR4Rev + Quarter, data = df_Profitmarg, na.action = na.omit)
##same negative coefficient an weak R squared
#HHI MC
reg14c <- lm(Profitmarg ~ HHIMC + Quarter, data = df_Profitmarg, na.action = na.omit)
#same negative and weak
#CR4 MC
reg14d <- lm(Profitmarg ~ CR4MC + Quarter, data = df_Profitmarg, na.action = na.omit)
#again
stargazer(reg14a, reg14b, reg14c, reg14d, title = "Time-fixed Model Profit Margins firm level", type = "text")
Time-fixed Model Profit Margins firm level
===========================================================================================
Dependent variable:
----------------------------------------------------------
Profitmarg
(1) (2) (3) (4)
-------------------------------------------------------------------------------------------
HHIRev -44,718.890***
(11,968.560)
CR4Rev -22,595.640**
(9,142.254)
HHIMC -47,301.970***
(10,828.050)
CR4MC -28,768.040***
(9,545.028)
Quarter -426.638 -412.088 -466.249 -477.446
(391.275) (391.412) (391.323) (391.750)
Constant 7,534.872* 13,151.340** 7,805.198* 15,039.170**
(4,227.508) (5,997.554) (4,176.391) (5,860.637)
-------------------------------------------------------------------------------------------
Observations 13,855 13,855 13,855 13,855
R2 0.001 0.001 0.001 0.001
Adjusted R2 0.001 0.0004 0.001 0.001
Residual Std. Error (df = 13852) 208,949.500 209,008.700 208,910.900 208,986.200
F Statistic (df = 2; 13852) 7.566*** 3.640** 10.128*** 5.128***
===========================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
### now we test again for the two-way fixed effects model
#effect of HHIRev on Profitmarg
reg15a <- lm(Profitmarg ~ HHIRev + Quarter + GICS_SubInd, data = df_Profitmarg, na.action = na.omit)
summary(reg13a)
Call:
lm(formula = Profitmarg ~ HHIRev, data = df_Profitmarg, na.action = na.omit)
Residuals:
Min 1Q Median 3Q Max
-24578116 -1352 -460 1445 35594
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3669 2303 1.593 0.111121
HHIRev -44692 11969 -3.734 0.000189 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 209000 on 13853 degrees of freedom
(4335 observations deleted due to missingness)
Multiple R-squared: 0.001006, Adjusted R-squared: 0.0009334
F-statistic: 13.94 on 1 and 13853 DF, p-value: 0.0001892
#effect of CR4Rev on Profitmarg
reg15b <- lm(Profitmarg ~ CR4Rev + Quarter + GICS_SubInd, data = df_Profitmarg, na.action = na.omit)
##same negative coefficient an weak R squared
#HHI MC
reg15c <- lm(Profitmarg ~ HHIMC + Quarter + GICS_SubInd, data = df_Profitmarg, na.action = na.omit)
#same negative and weak
#CR4 MC
reg15d <- lm(Profitmarg ~ CR4MC + Quarter + GICS_SubInd, data = df_Profitmarg, na.action = na.omit)
#again
stargazer(reg15a, reg15b, reg15c, reg15d, title = "Two-way fixed effects Model Profit Margins firm level" ,type = "text")
Two-way fixed effects Model Profit Margins firm level
=============================================================================================
Dependent variable:
------------------------------------------------------------
Profitmarg
(1) (2) (3) (4)
---------------------------------------------------------------------------------------------
HHIRev 278,765.200***
(87,654.730)
CR4Rev 6,526.000
(55,791.600)
HHIMC 96,763.700
(60,528.570)
CR4MC -10,964.380
(40,869.140)
Quarter -368.690 -409.320 -264.249 -433.626
(391.703) (392.830) (401.520) (405.176)
GICS_SubInd45102010 60,616.850*** 3,674.814 56,768.330 -8,405.311
(23,374.200) (36,454.940) (38,125.930) (33,084.520)
GICS_SubInd45102020 -14,807.360 -895.254 23,531.300 -486.372
(27,854.320) (28,058.330) (31,245.720) (27,500.770)
GICS_SubInd45102030 -87,141.380*** -306.370 47,670.850 -4,369.395
(31,940.970) (16,556.810) (34,237.680) (22,585.050)
GICS_SubInd45103010 62,156.470*** 3,980.556 55,410.220 -7,195.850
(23,546.240) (37,956.660) (37,145.310) (29,196.380)
GICS_SubInd45103020 30,360.690* 1,585.560 48,445.120 -4,656.484
(17,892.970) (21,729.630) (33,988.960) (22,323.070)
GICS_SubInd45201020 41,638.590** 1,682.935 55,113.340 -7,696.491
(18,571.880) (23,133.560) (37,169.630) (29,623.490)
GICS_SubInd45202030 -1,836.300 559.351 37,407.580 -3,422.752
(14,684.520) (16,144.240) (27,743.060) (18,907.740)
GICS_SubInd45203010 50,238.580** 2,344.318 50,537.200 -6,246.373
(20,255.880) (25,115.600) (34,147.150) (25,834.530)
GICS_SubInd45203015 51,921.320** 2,973.927 55,833.980 -7,239.905
(20,700.800) (29,750.090) (37,252.440) (29,219.110)
GICS_SubInd45203020 -87,662.500** -682.504 30,837.550 -1,229.966
(34,277.860) (20,773.980) (28,279.830) (20,802.030)
GICS_SubInd45203030 11,030.320 -236.770 28,349.860 -1,863.987
(19,413.750) (19,094.400) (26,161.800) (20,026.620)
GICS_SubInd45301010 35,149.770* 2,039.072 51,921.270 -5,519.396
(18,728.200) (22,933.270) (35,815.820) (25,607.310)
GICS_SubInd45301020 56,408.390** 3,293.565 50,582.750 -5,574.180
(22,004.750) (31,625.800) (34,250.580) (24,256.760)
GICS_SubInd50202010 38,778.630** 1,639.503 49,196.350 -5,260.829
(19,561.760) (23,592.840) (34,579.990) (23,431.000)
GICS_SubInd50203010 -202,347.800*** -67,368.710*** -92,281.040*** -66,520.720***
(45,561.240) (16,848.830) (22,656.730) (16,124.670)
Constant -67,600.750*** -1,911.130 -57,085.130 14,696.950
(25,685.430) (50,951.560) (40,106.880) (42,237.800)
---------------------------------------------------------------------------------------------
Observations 13,855 13,855 13,855 13,855
R2 0.003 0.003 0.003 0.003
Adjusted R2 0.002 0.001 0.002 0.001
Residual Std. Error (df = 13837) 208,816.700 208,892.900 208,873.700 208,892.400
F Statistic (df = 17; 13837) 2.810*** 2.215*** 2.365*** 2.218***
=============================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
# again very low explanitory valueIn a preliminary step, the statistical relationship between market concentration and the size of digital markets was examined. Accordingly, firm level Revenue and Market Capitalisation figures were aggregated per Quarter and GICS Subindustry. In a time-fixed model (i.e., controlling for Quarter), the two MC concentration measures were regressed on aggregated Market Capitalization and, correspondingly, the two revenue-based concentration measures on aggregated revenue. The results show a clear statistical relationship between the variables: the more concentrated a market, the larger the aggregated revenue and market capitalization of a market. While all concentration measures are highly significant, it is again market capitalization that serves as the better predictor of market size (in particular HHI MC). While this represents merely a naive model, the adjusted R-squared values show (HHI REv 0.066, CR4 Rev 0.031, HHI MC 0.296 (!), CR4 MC 0.045) give a clear indication that market concentration as a competition problem is more pronounced in large markets. The relationship between the size of a market and market concentration represents a good opportunity for futher, more in-depth research!
#Testing Hypothesis 3:
#Preleminary Analysis: Reduced market concentration has a statistical effect on the growth of China’s digital economy.
# Building a new data set with the aggregate revenue and market cap values per subindustry
# sum the market cap and revenue by subindustry and quarter
df_grow <- aggregate(cbind(MC_Q1, MC_Q2, MC_Q3, MC_Q4, MC_Q5, MC_Q6, MC_Q7, MC_Q8, MC_Q9,
MC_Q10, MC_Q11, MC_Q12, MC_Q13, MC_Q14, MC_Q15, MC_Q16, Rev_Q1,
Rev_Q2, Rev_Q3, Rev_Q4, Rev_Q5, Rev_Q6, Rev_Q7, Rev_Q8, Rev_Q9,
Rev_Q10, Rev_Q11, Rev_Q12, Rev_Q13, Rev_Q14, Rev_Q15, Rev_Q16) ~ GICS_SubInd, data = df_wide, sum)
#starting with revenue concentration measures
df_grow_Rev <- merge(df_grow, df_wide[, c("GICS_SubInd", "HHIRev_SubInd_Q1", "HHIRev_SubInd_Q2", "HHIRev_SubInd_Q3",
"HHIRev_SubInd_Q4", "HHIRev_SubInd_Q5", "HHIRev_SubInd_Q6", "HHIRev_SubInd_Q7",
"HHIRev_SubInd_Q8", "HHIRev_SubInd_Q9", "HHIRev_SubInd_Q10", "HHIRev_SubInd_Q11", "HHIRev_SubInd_Q12",
"HHIRev_SubInd_Q13", "HHIRev_SubInd_Q14", "HHIRev_SubInd_Q15", "HHIRev_SubInd_Q16", "CR4Rev_Subind_Q1",
"CR4Rev_Subind_Q2", "CR4Rev_Subind_Q3", "CR4Rev_Subind_Q4", "CR4Rev_Subind_Q5", "CR4Rev_Subind_Q6",
"CR4Rev_Subind_Q7", "CR4Rev_Subind_Q8", "CR4Rev_Subind_Q9", "CR4Rev_Subind_Q10", "CR4Rev_Subind_Q11",
"CR4Rev_Subind_Q12", "CR4Rev_Subind_Q13", "CR4Rev_Subind_Q14", "CR4Rev_Subind_Q15", "CR4Rev_Subind_Q16")], by = "GICS_SubInd", all.x = TRUE)
#dropping non unique values
df_grow_Rev <- df_grow_Rev[!duplicated(df_grow_Rev[, c("GICS_SubInd")]), ]
#transforming into long format
df_grow_1 <- df_grow_Rev %>%
pivot_longer(
cols = starts_with("Rev_Q"),
names_to = "Quarter",
values_to = "Rev"
) %>%
select(GICS_SubInd, Quarter, Rev)
df_grow_1$Quarter <- as.numeric(gsub("Rev_Q", "", df_grow_1$Quarter))
df_grow_2 <- df_grow_Rev %>%
pivot_longer(
cols = starts_with("HHIRev_SubInd_Q"),
names_to = "Quarter",
values_to = "HHIRev"
) %>%
select(GICS_SubInd, Quarter, HHIRev)
df_grow_2$Quarter <- as.numeric(gsub("HHIRev_SubInd_Q", "", df_grow_2$Quarter))
df_grow_3 <- df_grow_Rev %>%
pivot_longer(
cols = starts_with("CR4Rev_SubInd_Q"),
names_to = "Quarter",
values_to = "CR4Rev"
) %>%
select(GICS_SubInd, Quarter, CR4Rev)
df_grow_3$Quarter <- as.numeric(gsub("CR4Rev_Subind_Q", "", df_grow_3$Quarter))
dfgrow_Rev_long <- merge(merge(df_grow_1, df_grow_2, by = c("Quarter", "GICS_SubInd"), all = TRUE),
df_grow_3, by = c("Quarter", "GICS_SubInd"), all = TRUE)
## now we look at the effect of market concentration on total numbers of Revenue
reg16a <- lm(Rev ~ HHIRev + Quarter , data = dfgrow_Rev_long)
#summary(reg16a)
# the time-fxied regression model( when controlling for Quarter) shows the influence of HHI (Rev) on Total Revenue is highly significant
# the higher the market concentration, the higher the revenue of a market (???)
reg16b <- lm(Rev ~ CR4Rev + Quarter , data = dfgrow_Rev_long)
#summary(reg16b)
#CR4 also has a positive coefficient but is not significant
### now we are doing the same for market cap concentration measures
df_grow_MC <- merge(df_grow, df_wide[, c("GICS_SubInd", "HHIMC_SubInd_Q1", "HHIMC_SubInd_Q2", "HHIMC_SubInd_Q3", "HHIMC_SubInd_Q4", "HHIMC_SubInd_Q5",
"HHIMC_SubInd_Q6", "HHIMC_SubInd_Q7", "HHIMC_SubInd_Q8", "HHIMC_SubInd_Q9", "HHIMC_SubInd_Q10",
"HHIMC_SubInd_Q11", "HHIMC_SubInd_Q12", "HHIMC_SubInd_Q13", "HHIMC_SubInd_Q14", "HHIMC_SubInd_Q15",
"HHIMC_SubInd_Q16", "CR4MC_Subind_Q1", "CR4MC_Subind_Q2", "CR4MC_Subind_Q3", "CR4MC_Subind_Q4",
"CR4MC_Subind_Q5", "CR4MC_Subind_Q6", "CR4MC_Subind_Q7", "CR4MC_Subind_Q8", "CR4MC_Subind_Q9",
"CR4MC_Subind_Q10", "CR4MC_Subind_Q11", "CR4MC_Subind_Q12", "CR4MC_Subind_Q13", "CR4MC_Subind_Q14", "CR4MC_Subind_Q15", "CR4MC_Subind_Q16")], by = "GICS_SubInd", all.x = TRUE)
#dropping non unique values
df_grow_MC <- df_grow_MC[!duplicated(df_grow_MC[, c("GICS_SubInd")]), ]
#transforming into long format
df_grow_4 <- df_grow_MC %>%
pivot_longer(
cols = starts_with("MC_Q"),
names_to = "Quarter",
values_to = "MC"
) %>%
select(GICS_SubInd, Quarter, MC)
df_grow_4$Quarter <- as.numeric(gsub("MC_Q", "", df_grow_4$Quarter))
df_grow_5 <- df_grow_MC %>%
pivot_longer(
cols = starts_with("HHIMC_SubInd_Q"),
names_to = "Quarter",
values_to = "HHIMC"
) %>%
select(GICS_SubInd, Quarter, HHIMC)
df_grow_5$Quarter <- as.numeric(gsub("HHIMC_SubInd_Q", "", df_grow_5$Quarter))
df_grow_6 <- df_grow_MC %>%
pivot_longer(
cols = starts_with("CR4MC_SubInd_Q"),
names_to = "Quarter",
values_to = "CR4MC"
) %>%
select(GICS_SubInd, Quarter, CR4MC)
df_grow_6$Quarter <- as.numeric(gsub("CR4MC_Subind_Q", "", df_grow_6$Quarter))
#merging MC data sets
dfgrow_MC_long <- merge(merge(df_grow_4, df_grow_5, by = c("Quarter", "GICS_SubInd"), all = TRUE),
df_grow_6, by = c("Quarter", "GICS_SubInd"), all = TRUE)
#regression market concentration variables on total MC with Quarter as control
reg16c <- lm(MC ~ HHIMC + Quarter , data = dfgrow_MC_long)
#summary(reg16c)
# same thing: even when controlling for Quarter, the influence of HHI (MC) on Total Revenue is highly significant
# the higher the market concentration, the higher the MC of a market
reg16d <- lm(MC ~ CR4MC + Quarter , data = dfgrow_MC_long)
#summary(reg16d)
## this time CR4 concentration measure is also significant, even highly!!!
#conclusion Larger markets are more concentrated
stargazer(reg16a, reg16b, reg16c, reg16d, title = "Naive Model Market Concentration and Market Size", type = "text")
Naive Model Market Concentration and Market Size
===============================================================================================================================
Dependent variable:
------------------------------------------------------------------------------------------------
Rev MC
(1) (2) (3) (4)
-------------------------------------------------------------------------------------------------------------------------------
HHIRev 385,872,303,731.000***
(121,304,982,623.000)
CR4Rev 95,939,270,911.000
(105,624,904,965.000)
HHIMC 19,122,860,336,600.000***
(1,870,266,392,250.000)
CR4MC 6,638,910,245,635.000***
(2,043,512,598,127.000)
Quarter 18,046,663,164.000*** 17,646,246,734.000*** 286,920,484,981.000*** 233,813,978,575.000**
(5,624,617,637.000) (5,725,099,833.000) (96,494,394,910.000) (112,339,531,872.000)
Constant 290,772,698,669.000*** 320,086,731,004.000*** 230,519,542,215.000 913,729,570,006.000
(61,672,423,096.000) (88,980,825,298.000) (1,044,494,198,920.000) (1,681,848,350,024.000)
-------------------------------------------------------------------------------------------------------------------------------
Observations 256 256 256 256
R2 0.073 0.039 0.301 0.052
Adjusted R2 0.066 0.031 0.296 0.045
Residual Std. Error (df = 253) 414,729,794,806.000 422,254,208,708.000 7,093,076,372,427.000 8,261,598,569,857.000
F Statistic (df = 2; 253) 9.966*** 5.145*** 54.565*** 6.968***
===============================================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
Moving on to the main anlysis, the effect of the new regulatory approach on growth rates was tested (1) one the aggregated market level and (2) on the firm level. When plotting the calculating growth rates per GICS subindustry we can observe a general negative trend for market capitalisation growth rates and a cyclical yet stable trajectory for revenue growth rates. In all cases, no abnormal patterns can be observed around the cut off with the notable exception of the 45102020 subindustry (Data Processing & Outsourced Services) that is dropping off dramatically in quarter 9 before quickly rebounding in the market capitalisation graph. A time-fixed model shows so significant treatment effect for either market cap or revenue growth rates. An interaction term for the different market concentration measures was included to test whether a part of the treatment effect was mediated by market concentratino with no significant results.
#####no we calculate the growth rates
df_growthrateMC <- df_grow_4 %>%
group_by(GICS_SubInd) %>%
mutate(Growth_MC = (MC - lag(MC))/lag(MC)) %>%
select(GICS_SubInd, Quarter, Growth_MC)
df_growthrateRev <- df_grow_1 %>%
group_by(GICS_SubInd) %>%
mutate(Growth_Rev = (Rev - lag(Rev))/lag(Rev)) %>%
select(GICS_SubInd, Quarter, Growth_Rev)
#merging MC data sets
dfgrow_MC_long <- merge(dfgrow_MC_long, df_growthrateMC, by = c("Quarter", "GICS_SubInd"), all = TRUE)
#merging Rev data sets
dfgrow_Rev_long <- merge(dfgrow_Rev_long, df_growthrateRev, by = c("Quarter", "GICS_SubInd"), all = TRUE)
##### Let's plot growth rates of Market Cap and Rev
ggplot(dfgrow_MC_long, aes(x = Quarter, y = Growth_MC, group = GICS_SubInd, color = GICS_SubInd)) +
geom_line() +
labs(x = "Quarter", y = "Growthrate Market Cap", color = "GICS Subindustry") +
geom_vline(xintercept = 8, linetype = "dotted") +
ggtitle("Market Capitalisation Growthrate per GICS Subindustry") +
theme_minimal()###very confusing, one cant really detect any patterns
###sharp decline only in 1 GICS subindustry: 45102020 --> Data Processing & Outsourced Services
ggplot(dfgrow_Rev_long , aes(x = Quarter, y = Growth_Rev, group = GICS_SubInd, color = GICS_SubInd)) +
geom_line() +
labs(x = "Quarter", y = "Growthrate Revenze", color = "GICS Subindustry") +
geom_vline(xintercept = 8, linetype = "dotted") +
ggtitle("Revenue Growthrate per GICS Subindustry") +
theme_minimal()### first we test the concentration measures (controlled for Quarter) as a predictor on growth rates
#first 2 concentration measures for market cap in a time-fixed model
reg17a <- lm(Growth_MC ~ HHIMC + Quarter , data = dfgrow_MC_long)
#summary(reg17a)
### no significant effect of HHI (MC) on Market Cap growth rate when controlling for Quarter (same if we remove Quarter)
reg17b <- lm(Growth_MC ~ CR4MC + Quarter , data = dfgrow_MC_long)
#summary(reg10)
# same when testing for CR4
# now for the 2 concentration measures for revenue
reg17c <- lm(Growth_Rev ~ HHIRev + Quarter , data = dfgrow_Rev_long)
#summary(reg17c)
### no significant effect
reg17d <- lm(Growth_Rev ~ CR4Rev + Quarter , data = dfgrow_Rev_long)
#summary(reg12)
## also no significant coefficient
### at the subindustry market level there is no statistical significant effect
#stargazer(reg17a, reg17b, reg17c, reg17d, title = "Time-fixed Model Market Concentration Profit Margins Aggregate Market Level", type = "text")
# Now we test the treatment effect of the regulatory approach by including a dummy variable
#adding treatment variable
dfgrow_Rev_long$treatment <- ifelse(dfgrow_Rev_long$Quarter >= 9, 1, 0)
dfgrow_MC_long$treatment <- ifelse(dfgrow_MC_long$Quarter >= 9, 1, 0)
#2 time fixed effects model to test treatment effect
#REV
reg18a <- lm(Growth_Rev ~ Quarter + treatment, data = dfgrow_Rev_long)
#MC
reg18b <- lm(Growth_MC ~ Quarter + treatment, data = dfgrow_MC_long)
#summary(reg18b)
#no significant treatment effect
stargazer(reg18a, reg18b, title = "Time fixed effects Model Profit Margins Aggregate Market Level",type = "text")
Time fixed effects Model Profit Margins Aggregate Market Level
===========================================================
Dependent variable:
----------------------------
Growth_Rev Growth_MC
(1) (2)
-----------------------------------------------------------
Quarter -0.008 -0.016***
(0.009) (0.005)
treatment 0.059 -0.027
(0.078) (0.046)
Constant 0.108** 0.202***
(0.053) (0.031)
-----------------------------------------------------------
Observations 240 240
R2 0.003 0.177
Adjusted R2 -0.005 0.170
Residual Std. Error (df = 237) 0.300 0.178
F Statistic (df = 2; 237) 0.357 25.416***
===========================================================
Note: *p<0.1; **p<0.05; ***p<0.01
#2 inclduing interaction term for treatment and market concentration to test for effect mediated by market concentration
#HHIREV
#HHI Rev
reg18c <- lm(Growth_Rev ~ HHIRev + Quarter + treatment + HHIRev*treatment, data = dfgrow_Rev_long)
#no significant results
#CR4 Rev
reg18d <- lm(Growth_Rev ~ CR4Rev + Quarter + treatment + CR4Rev*treatment, data = dfgrow_Rev_long)
#not significant
#HHI MC
reg18e <- lm(Growth_MC ~ HHIMC + Quarter + treatment + HHIMC*treatment, data = dfgrow_MC_long)
#no significant treatment effect
#CR4 MC
reg18f <- lm(Growth_MC ~ CR4MC + Quarter + treatment + + CR4MC*treatment, data = dfgrow_MC_long)
stargazer(reg18c, reg18d, reg18e, reg18f, title = "Time fixed effects Model Profit Margins Aggregate Market Level with interaction term",type = "text")
Time fixed effects Model Profit Margins Aggregate Market Level with interaction term
==================================================================
Dependent variable:
-----------------------------------
Growth_Rev Growth_MC
(1) (2) (3) (4)
------------------------------------------------------------------
HHIRev -0.049
(0.129)
CR4Rev -0.049
(0.113)
HHIMC 0.042
(0.067)
CR4MC 0.068
(0.066)
Quarter -0.007 -0.007 -0.016*** -0.016***
(0.009) (0.009) (0.005) (0.005)
treatment 0.077 0.118 -0.006 0.034
(0.088) (0.127) (0.051) (0.073)
HHIRev:treatment -0.082
(0.184)
CR4Rev:treatment -0.092
(0.156)
HHIMC:treatment -0.095
(0.098)
CR4MC:treatment -0.100
(0.091)
Constant 0.119* 0.139 0.193*** 0.162***
(0.061) (0.091) (0.035) (0.051)
------------------------------------------------------------------
Observations 240 240 240 240
R2 0.008 0.011 0.180 0.181
Adjusted R2 -0.009 -0.006 0.166 0.167
Residual Std. Error (df = 235) 0.300 0.300 0.178 0.178
F Statistic (df = 4; 235) 0.465 0.661 12.886*** 13.005***
==================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
For the firm level the growth rates were calculated on the same basis (again excluding columns with missing revenue or revenue = 0). This time a two-way fixed effects was employed controlling for quarter and GICS subindustry. Only the market capitalization models ascertains a small positive treatment effect at the 95 percent significance level. When introcuding an interaction term for the different market concentration measures,the interaction itself is never significant but the treatment effect remains significant in the HHI MC model.However, the explanatory value is relatively low at 1,5 percent. For future reasearch, it would be very interesting to build a more accurate model that incorporates all relevant predictors of growth rate identified by the literature. This would allow us to obtain a better understanding of the predictory power of market concentration and the new regulatory approach. That being said, there is no indication of a negative effect of the regulatory approach on firm growth rates, which is an important result in itself.
### we can also do that on firm level
# in order to do that we need to calculate the growth rate for revenue and market cap
#transforming df_wide into long
dflong1 <- df_wide %>%
pivot_longer(
cols = starts_with("Rev_Q"),
names_to = "Quarter",
values_to = "Rev"
) %>%
select(Name, GICS_SubInd, Quarter, Rev)
dflong1$Quarter <- as.numeric(gsub("Rev_Q", "", dflong1$Quarter))
dflong2 <- df_wide %>%
pivot_longer(
cols = starts_with("HHIRev_SubInd_Q"),
names_to = "Quarter",
values_to = "HHIRev"
) %>%
select(Name, GICS_SubInd, Quarter, HHIRev)
dflong2$Quarter <- as.numeric(gsub("HHIRev_SubInd_Q", "", dflong2$Quarter))
dflong3 <- df_wide %>%
pivot_longer(
cols = starts_with("CR4Rev_SubInd_Q"),
names_to = "Quarter",
values_to = "CR4Rev"
) %>%
select(Name, GICS_SubInd, Quarter, CR4Rev)
dflong3$Quarter <- as.numeric(gsub("CR4Rev_Subind_Q", "", dflong3$Quarter))
dflong4 <- df_wide %>%
pivot_longer(
cols = starts_with("MC_Q"),
names_to = "Quarter",
values_to = "MC"
) %>%
select(Name, GICS_SubInd, Quarter, MC)
dflong4$Quarter <- as.numeric(gsub("MC_Q", "", dflong4$Quarter))
dflong5 <- df_wide %>%
pivot_longer(
cols = starts_with("HHIMC_SubInd_Q"),
names_to = "Quarter",
values_to = "HHIMC"
) %>%
select(Name, GICS_SubInd, Quarter, HHIMC)
dflong5$Quarter <- as.numeric(gsub("HHIMC_SubInd_Q", "", dflong5$Quarter))
dflong6 <- df_wide %>%
pivot_longer(
cols = starts_with("CR4MC_SubInd_Q"),
names_to = "Quarter",
values_to = "CR4MC"
) %>%
select(Name, GICS_SubInd, Quarter, CR4MC)
dflong6$Quarter <- as.numeric(gsub("CR4MC_Subind_Q", "", dflong6$Quarter))
#joining long data sets
dfgrowthrates <- left_join(dflong1, dflong2, by = c("Name", "GICS_SubInd", "Quarter")) %>%
left_join(dflong3, by = c("Name", "GICS_SubInd", "Quarter")) %>%
left_join(dflong4, by = c("Name", "GICS_SubInd", "Quarter")) %>%
left_join(dflong5, by = c("Name", "GICS_SubInd", "Quarter")) %>%
left_join(dflong6, by = c("Name", "GICS_SubInd", "Quarter"))
#calculating growth rates of Rev and MC firm level
dfgrowthrates <- dfgrowthrates %>%
arrange(Name, Quarter) %>%
group_by(Name) %>%
mutate(
GrowthR_MC = (MC - lag(MC)) / lag(MC),
GrowthR_Rev = (Rev - lag(Rev)) / lag(Rev)
)
# now we add the treatment
dfgrowthrates$treatment <- ifelse(dfgrowthrates$Quarter >= 9, 1, 0)
#again get rid of the infinity results
dfgrowthrates <- dfgrowthrates[!dfgrowthrates$GrowthR_Rev %in% c(-Inf, Inf),]
dfgrowthrates <- dfgrowthrates[!dfgrowthrates$GrowthR_MC %in% c(-Inf, Inf),]
### now we can do our two way fixed effects model
# Rev
reg19a <- lm(GrowthR_Rev ~ GICS_SubInd + Quarter + treatment, data = dfgrowthrates)
#summary(reg19a)
#### no significance
#MC
reg19b <- lm(GrowthR_MC ~ GICS_SubInd + Quarter + treatment, data = dfgrowthrates)
#summary(reg19b)
stargazer(reg19a, reg19b, title="Two-way fixed effcts Treament Profit Margins Firm Level", type = "text")
Two-way fixed effcts Treament Profit Margins Firm Level
========================================================================
Dependent variable:
----------------------------------------------------
GrowthR_Rev GrowthR_MC
(1) (2)
------------------------------------------------------------------------
GICS_SubInd45102010 0.205 0.054
(0.509) (0.040)
GICS_SubInd45102020 -0.068 0.075
(1.053) (0.074)
GICS_SubInd45102030 0.097 0.194***
(0.630) (0.053)
GICS_SubInd45103010 0.181 0.087**
(0.495) (0.039)
GICS_SubInd45103020 0.433 0.061
(0.575) (0.046)
GICS_SubInd45201020 0.276 0.084**
(0.493) (0.039)
GICS_SubInd45202030 0.119 0.053
(0.559) (0.044)
GICS_SubInd45203010 0.208 0.068*
(0.478) (0.038)
GICS_SubInd45203015 -0.043 0.090**
(0.480) (0.038)
GICS_SubInd45203020 -0.066 0.151***
(0.784) (0.058)
GICS_SubInd45203030 4.124*** 0.068
(0.727) (0.061)
GICS_SubInd45301010 0.094 0.190***
(0.578) (0.048)
GICS_SubInd45301020 0.350 0.138***
(0.494) (0.039)
GICS_SubInd50202010 1.240** 0.093**
(0.576) (0.044)
GICS_SubInd50203010 -0.054 0.040
(0.610) (0.045)
Quarter -0.025 -0.022***
(0.031) (0.003)
treatment 0.171 0.048**
(0.271) (0.023)
Constant 0.274 0.140***
(0.487) (0.038)
------------------------------------------------------------------------
Observations 12,916 13,027
R2 0.005 0.015
Adjusted R2 0.003 0.014
Residual Std. Error 7.733 (df = 12898) 0.660 (df = 13009)
F Statistic 3.508*** (df = 17; 12898) 11.802*** (df = 17; 13009)
========================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
### we get a positive treatment effect for MC
# Let's test for interaction effect with market concentration to see what part is mediated through a change in market concentration
#HHI Rev
reg19c <- lm(GrowthR_Rev ~ HHIRev + GICS_SubInd + Quarter + treatment + HHIRev*treatment, data = dfgrowthrates)
#summary(reg19c)
#CR4 Rev
reg19d <- lm(GrowthR_Rev ~ CR4Rev + GICS_SubInd + Quarter + treatment + CR4Rev*treatment, data = dfgrowthrates)
#summary(reg19d)
#HHIMC
reg19e <- lm(GrowthR_MC ~ HHIMC + GICS_SubInd + Quarter + treatment + HHIMC*treatment, data = dfgrowthrates)
#summary(reg19e)
#CR4 MC
reg19f <- lm(GrowthR_MC ~ CR4MC + GICS_SubInd + Quarter + treatment + CR4MC*treatment, data = dfgrowthrates)
#summary(reg19f)
stargazer(reg19c, reg19d, reg19e, reg19f, title="Two-way fixed effcts with Interaction Term Profit Margins Firm Level", type = "text")
Two-way fixed effcts with Interaction Term Profit Margins Firm Level
=============================================================================================================================
Dependent variable:
---------------------------------------------------------------------------------------------------------
GrowthR_Rev GrowthR_MC
(1) (2) (3) (4)
-----------------------------------------------------------------------------------------------------------------------------
HHIRev -3.965
(3.604)
CR4Rev -0.560
(2.251)
HHIMC -0.330*
(0.195)
CR4MC -0.174
(0.152)
GICS_SubInd45102010 -0.644 0.255 -0.142 -0.073
(0.963) (1.468) (0.130) (0.119)
GICS_SubInd45102020 0.140 -0.047 -0.016 0.070
(1.071) (1.077) (0.094) (0.074)
GICS_SubInd45102030 1.289 0.126 0.028 0.127
(1.302) (0.631) (0.118) (0.080)
GICS_SubInd45103010 -0.689 0.244 -0.104 -0.022
(0.974) (1.534) (0.127) (0.104)
GICS_SubInd45103020 -0.010 0.493 -0.108 -0.009
(0.721) (0.874) (0.117) (0.078)
GICS_SubInd45201020 -0.307 0.307 -0.107 -0.027
(0.748) (0.915) (0.127) (0.106)
GICS_SubInd45202030 0.130 0.152 -0.076 0.003
(0.559) (0.627) (0.093) (0.063)
GICS_SubInd45203010 -0.498 0.263 -0.107 -0.027
(0.833) (1.011) (0.117) (0.092)
GICS_SubInd45203015 -0.767 -0.005 -0.102 -0.020
(0.846) (1.189) (0.127) (0.105)
GICS_SubInd45203020 1.130 -0.040 0.040 0.136**
(1.375) (0.795) (0.092) (0.060)
GICS_SubInd45203030 3.969*** 4.143*** -0.031 0.043
(0.743) (0.727) (0.088) (0.065)
GICS_SubInd45301010 -0.403 0.135 0.011 0.102
(0.753) (0.918) (0.123) (0.092)
GICS_SubInd45301020 -0.438 0.402 -0.036 0.052
(0.907) (1.275) (0.117) (0.086)
GICS_SubInd50202010 0.698 1.281 -0.077 0.019
(0.778) (0.928) (0.116) (0.079)
GICS_SubInd50203010 1.811 -0.051 0.125* 0.044
(1.882) (0.642) (0.069) (0.045)
Quarter -0.025 -0.026 -0.023*** -0.023***
(0.031) (0.032) (0.003) (0.003)
treatment 0.153 -0.353 0.054** 0.057
(0.294) (0.444) (0.026) (0.039)
HHIRev:treatment 0.137
(0.969)
CR4Rev:treatment 1.064
(0.718)
HHIMC:treatment 0.001
(0.066)
CR4MC:treatment 0.006
(0.059)
Constant 1.274 0.512 0.352** 0.314**
(1.054) (2.040) (0.138) (0.158)
-----------------------------------------------------------------------------------------------------------------------------
Observations 12,916 12,916 13,027 13,027
R2 0.005 0.005 0.015 0.015
Adjusted R2 0.003 0.003 0.014 0.014
Residual Std. Error 7.733 (df = 12896) 7.733 (df = 12896) 0.660 (df = 13007) 0.660 (df = 13007)
F Statistic 3.206*** (df = 19; 12896) 3.255*** (df = 19; 12896) 10.719*** (df = 19; 13007) 10.628*** (df = 19; 13007)
=============================================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
### firms entering/leaving the market is approximated by difference in listed firms
#working with NAs here
###Overview NA's for market cap
df_na <- df_wide %>% select(4:19)
# count the number of NAs by variable
na_counts <- colSums(is.na(df_na))
na_counts MC_Q1 MC_Q2 MC_Q3 MC_Q4 MC_Q5 MC_Q6 MC_Q7 MC_Q8 MC_Q9 MC_Q10 MC_Q11
445 434 423 396 365 347 321 264 239 206 177
MC_Q12 MC_Q13 MC_Q14 MC_Q15 MC_Q16
158 131 105 79 32
#ok let's get the actual df with all relevant variables
df_NA <- df_wide %>% select(2:19, 84:115)
#converting it too long format
df_NA_long1 <- df_NA %>%
pivot_longer(
cols = starts_with("MC_Q"),
names_to = "Quarter",
values_to = "MC"
) %>%
select(Name, GICS_SubInd, Quarter, MC)
df_NA_long1$Quarter <- as.numeric(gsub("MC_Q", "", df_NA_long1$Quarter))
#transforming df so it contains NAs for MC per Quarter and Subindustry
df_NA_long1 <- df_NA_long1 %>%
group_by(GICS_SubInd, Quarter) %>%
summarize(Missing_MC = sum(is.na(MC)))
#creating df for MCHHI
df_NA_long2 <- df_NA[!duplicated(df_NA[, c("GICS_SubInd")]), ]
df_NA_long2 <- df_NA_long2 %>%
pivot_longer(
cols = starts_with("HHIMC_SubInd_Q"),
names_to = "Quarter",
values_to = "HHIMC"
) %>%
select(GICS_SubInd, Quarter, HHIMC)
df_NA_long2$Quarter <- as.numeric(gsub("HHIMC_SubInd_Q", "", df_NA_long2$Quarter))
#getting CR4MC
df_NA_long3 <- df_NA[!duplicated(df_NA[, c("GICS_SubInd")]), ]
df_NA_long3 <- df_NA_long3 %>%
pivot_longer(
cols = starts_with("CR4MC_SubInd_Q"),
names_to = "Quarter",
values_to = "CR4MC"
) %>%
select(GICS_SubInd, Quarter, CR4MC)
df_NA_long3$Quarter <- as.numeric(gsub("CR4MC_Subind_Q", "", df_NA_long3$Quarter))
###merging data sets
df_NA_long <- merge(merge(df_NA_long1, df_NA_long2, by = c("Quarter", "GICS_SubInd"), all = TRUE), df_NA_long3, by = c("Quarter", "GICS_SubInd"), all = TRUE)
df_NA_long <- df_NA_long %>% arrange(Quarter)
# Calculate the change in missing values by subindustry and quarter
df_NA_long <- df_NA_long %>%
group_by(GICS_SubInd) %>%
mutate(NewFirms = Missing_MC - lag(Missing_MC)) %>%
mutate(NewFirms = -1 * NewFirms) %>%
ungroup()
# Plot the data using ggplot2
ggplot(df_NA_long, aes(x = Quarter, y = NewFirms, group = GICS_SubInd, color = GICS_SubInd)) +
geom_line() +
labs(x = "Quarter", y = "New Firms", color = "GICS Subindustry") +
geom_vline(xintercept = 8, linetype = "dotted") +
ggtitle("Firms entering the Market") +
theme_minimal()#no real patterns detectable but there are some dropoffs around the cutoff
### regression time first only looking at HHIMC on new firms
reg20a <- lm(NewFirms ~ HHIMC, data = df_NA_long)
#summary(reg20a)
reg20b <- lm(NewFirms ~ CR4MC, data = df_NA_long)
#summary(reg20b)
#stargazer(reg20a, reg20b, title="Naive Regression Market Concentration New Firms", type = "text")
#the higher the HHI and market concentration, the lower the number of new (listed) firms entering the market
#-->highly significant effect
#-->adj r-squared at around 7 percent
#now checking for time-fixed effect
reg20c <- lm(NewFirms ~ HHIMC + Quarter, data = df_NA_long)
#summary(reg20c)
reg20d <- lm(NewFirms ~ CR4MC + Quarter, data = df_NA_long)
#summary(reg20d)
stargazer(reg20a, reg20b, reg20c, reg20d, title="Relationship Market Concentration New Firms", type = "text")
Relationship Market Concentration New Firms
===================================================================================================================
Dependent variable:
-----------------------------------------------------------------------------------------------
NewFirms
(1) (2) (3) (4)
-------------------------------------------------------------------------------------------------------------------
HHIMC -2.717*** -2.637***
(0.617) (0.617)
CR4MC -3.645*** -3.578***
(0.552) (0.552)
Quarter 0.057* 0.053
(0.034) (0.032)
Constant 2.307*** 3.880*** 1.773*** 3.367***
(0.198) (0.355) (0.370) (0.474)
-------------------------------------------------------------------------------------------------------------------
Observations 240 240 240 240
R2 0.075 0.155 0.086 0.164
Adjusted R2 0.071 0.151 0.079 0.157
Residual Std. Error 2.258 (df = 238) 2.159 (df = 238) 2.249 (df = 237) 2.151 (df = 237)
F Statistic 19.366*** (df = 1; 238) 43.602*** (df = 1; 238) 11.214*** (df = 2; 237) 23.285*** (df = 2; 237)
===================================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
#now checking for treatment effect in a time fixed effects model
df_NA_long$treatment <- ifelse(df_NA_long$Quarter >= 9, 1, 0)
reg21a <- lm(NewFirms ~ Quarter + treatment, data = df_NA_long)
#summary(reg21a)
# relevant positive treatment effect
reg21b <- lm(NewFirms ~ Quarter + treatment, data = df_NA_long)
# now we introduce market concentration as a control variable
reg21c <- lm(NewFirms ~ HHIMC + Quarter + treatment, data = df_NA_long)
#summary(reg21c)
# relevant positive treatment effect
reg21d <- lm(NewFirms ~ CR4MC + Quarter + treatment, data = df_NA_long)
#summary(reg21d)
stargazer(reg21a, reg21b, reg21c, reg21d, title="Time-fixed Regression Model with Concentration as Control", type = "text")
Time-fixed Regression Model with Concentration as Control
==============================================================================================================
Dependent variable:
------------------------------------------------------------------------------------------
NewFirms
(1) (2) (3) (4)
--------------------------------------------------------------------------------------------------------------
HHIMC -2.627***
(0.612)
CR4MC -3.531***
(0.549)
Quarter 0.195*** 0.195*** 0.182*** 0.164**
(0.069) (0.069) (0.067) (0.064)
treatment -1.263** -1.263** -1.244** -1.113**
(0.598) (0.598) (0.578) (0.554)
Constant 0.643 0.643 1.315*** 2.929***
(0.409) (0.409) (0.424) (0.519)
--------------------------------------------------------------------------------------------------------------
Observations 240 240 240 240
R2 0.034 0.034 0.104 0.178
Adjusted R2 0.026 0.026 0.093 0.168
Residual Std. Error 2.313 (df = 237) 2.313 (df = 237) 2.232 (df = 236) 2.138 (df = 236)
F Statistic 4.182** (df = 2; 237) 4.182** (df = 2; 237) 9.138*** (df = 3; 236) 17.069*** (df = 3; 236)
==============================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
In this section we do some robustness checks for our two models. In conclusion, we can establish that market concentration is a strong predicator of the number of new firms regardless of model. The higher the market concentration, the fewer firms enter the market. Second, the plots and the different regressions point to a small positive treatment effect. However, the robustness checks show that the regression models are exposed to a high degree of heteroskedasticity. Neither loging the dependant variable nor using a higher polynomial for quarter fixes the issues. When trying to control for this with an interaction term between quarter and treatment, the interaction istelf, but not the treatment effect is significant. Ultimately, the results are inconclusive.
#first I remove all quarter 1 values because they all have NAs
df_NA_long <- df_NA_long %>%
filter(Quarter != 1)
# Add predicted values to the data frame
df_NA_long$predictedHHI <- predict(reg21a)
df_NA_long$predictedCR4 <- predict(reg21b)
# Create line plot of actual and predicted values across quarters for HHI MC Model
ggplot(data = na.omit(df_NA_long), aes(x = Quarter, y = NewFirms, group = GICS_SubInd)) +
geom_line(aes(color = "Actual")) +
geom_line(aes(y = predictedHHI, color = "PredictedHHI")) +
scale_color_manual(values = c("Actual" = "black", "PredictedHHI" = "red")) +
labs(x = "Quarter", y = "NewFirms", title = "Regression Model Performance HHI MC")# Create line plot of actual and predicted values across quarters for CR4 MC Model
ggplot(data = na.omit(df_NA_long), aes(x = Quarter, y = NewFirms, group = GICS_SubInd)) +
geom_line(aes(color = "Actual")) +
geom_line(aes(y = predictedCR4, color = "PredictedCR4")) +
scale_color_manual(values = c("Actual" = "black", "PredictedCR4" = "red")) +
labs(x = "Quarter", y = "NewFirms", title = "Regression Model Performance CR4 MC")## that doesn't look too god
#### testing for Linearity
#HHI MC model
# Plotting fitted values against residuals
plot(reg21a, 1)
#spread of the residuals increases as the predicted values increase thereby indicating that the variances of the residuals are not constant across the range of the data.
#CR4 MC model
# Plotting fitted values against residuals
plot(reg21b, 1)#same problem applies
#testing for homosecasticity
#HHI MC
bp_HHI <- bptest(reg21a)
bp_HHI
studentized Breusch-Pagan test
data: reg21a
BP = 10.348, df = 2, p-value = 0.005662
#based on the results, there is evidence of heteroscedasticity in the regression mode
#CR4 MC
bp_CR4 <- bptest(reg21b)
bp_CR4
studentized Breusch-Pagan test
data: reg21b
BP = 10.348, df = 2, p-value = 0.005662
## same here
### let's test different approach to deal with heteroscedasticity
#first let make Quarter not linear but quadratic
r1HHI <- lm(NewFirms ~ HHIMC + Quarter^2 + treatment, data = df_NA_long)
r1CR4 <- lm(NewFirms ~ CR4MC + Quarter^2 + treatment, data = df_NA_long)
#now redo the test
bp_HHI_robust <- bptest(r1HHI)
bp_HHI_robust
studentized Breusch-Pagan test
data: r1HHI
BP = 13.508, df = 3, p-value = 0.003657
#has not changed anything
bp_CR4_robust <- bptest(r1CR4)
bp_CR4_robust
studentized Breusch-Pagan test
data: r1CR4
BP = 23.112, df = 3, p-value = 3.827e-05
#doesn't work
### we can't log the dependant variable because there are some negative values
#another option is to estimate different slopes for the treatment levels in the regression model with interaction term between the treatment variable and the Quarter variable. This will allow the slope of the Quarter variable to vary depending on the treatment level.
r2HHI <- lm(NewFirms ~ HHIMC + Quarter + treatment + Quarter*treatment, data = df_NA_long)
r2CR4 <- lm(NewFirms ~ CR4MC + Quarter + treatment + Quarter*treatment, data = df_NA_long)
summary(r2HHI)
Call:
lm(formula = NewFirms ~ HHIMC + Quarter + treatment + Quarter *
treatment, data = df_NA_long)
Residuals:
Min 1Q Median 3Q Max
-3.1186 -1.4453 -0.4713 0.7282 10.5423
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5012 0.5844 0.858 0.39196
HHIMC -2.6374 0.6081 -4.338 2.14e-05 ***
Quarter 0.3451 0.1048 3.292 0.00115 **
treatment 0.9314 1.2250 0.760 0.44783
Quarter:treatment -0.2720 0.1353 -2.010 0.04553 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.218 on 235 degrees of freedom
Multiple R-squared: 0.1192, Adjusted R-squared: 0.1042
F-statistic: 7.952 on 4 and 235 DF, p-value: 4.991e-06
summary(r2CR4)
Call:
lm(formula = NewFirms ~ CR4MC + Quarter + treatment + Quarter *
treatment, data = df_NA_long)
Residuals:
Min 1Q Median 3Q Max
-3.7361 -1.3411 -0.4142 0.6915 10.2725
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.0437 0.6333 3.227 0.00143 **
CR4MC -3.5946 0.5439 -6.608 2.58e-10 ***
Quarter 0.3488 0.1000 3.488 0.00058 ***
treatment 1.3606 1.1713 1.162 0.24659
Quarter:treatment -0.3088 0.1293 -2.389 0.01766 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.117 on 235 degrees of freedom
Multiple R-squared: 0.1978, Adjusted R-squared: 0.1841
F-statistic: 14.48 on 4 and 235 DF, p-value: 1.376e-10
#in both cases, the treatment effect is no longer significant. Based on the p-value for the treatment variable, it does not have a significant effect on the number of NewFirms. However, the interaction term "Quarter:treatment" is significant, which means that the effect of treatment on NewFirms depends on the quarter. Therefore, it is important to examine the coefficients for the treatment variable for each quarter separately to determine if there is a significant effect.
#first let's see if there is still evidence of heteroscedasticity in the regression model
df_NA_long$predictedHHInew <- predict(r2HHI)
# Create line plot of actual and predicted values across quarters for new HHI MC Model
ggplot(data = na.omit(df_NA_long), aes(x = Quarter, y = NewFirms, group = GICS_SubInd)) +
geom_line(aes(color = "Actual")) +
geom_line(aes(y = predictedHHI, color = "PredictedHHInew")) +
scale_color_manual(values = c("Actual" = "black", "PredictedHHInew" = "red")) +
labs(x = "Quarter", y = "NewFirms", title = " HHI MC Model With interaction effect")df_NA_long$predictedCR4new <- predict(r2CR4)
# Create line plot of actual and predicted values across quarters for new CR4 MC Model
ggplot(data = na.omit(df_NA_long), aes(x = Quarter, y = NewFirms, group = GICS_SubInd)) +
geom_line(aes(color = "Actual")) +
geom_line(aes(y = predictedHHI, color = "PredictedCR4new")) +
scale_color_manual(values = c("Actual" = "black", "PredictedCR4new" = "red")) +
labs(x = "Quarter", y = "NewFirms", title = " CR4 MC Model With interaction effect")bp_r2HHI <- bptest(r1HHI)
bp_r2HHI
studentized Breusch-Pagan test
data: r1HHI
BP = 13.508, df = 3, p-value = 0.003657
#has not changed anything
bp_r2CR4 <- bptest(r2CR4)
bp_r2CR4
studentized Breusch-Pagan test
data: r2CR4
BP = 21.451, df = 4, p-value = 0.0002577
#same here
#Last try: Recoding the Quarter Variable as Character so we get an interaction effect for each quarter
#turning Quarter into character variable
df_NA_long$Quarter_char <- paste0("Q", df_NA_long$Quarter)
#new regerssion with interaction term with character quarter variable
r3HHI <- lm(NewFirms ~ HHIMC + Quarter_char + treatment + Quarter_char*treatment, data = df_NA_long)
summary(r3HHI)
Call:
lm(formula = NewFirms ~ HHIMC + Quarter_char + treatment + Quarter_char *
treatment, data = df_NA_long)
Residuals:
Min 1Q Median 3Q Max
-3.4946 -1.4450 -0.4942 0.8015 9.7198
Coefficients: (15 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.61529 0.56696 4.613 6.68e-06 ***
HHIMC -2.63380 0.60602 -4.346 2.10e-05 ***
Quarter_charQ11 -0.25682 0.78136 -0.329 0.7427
Quarter_charQ12 -0.87393 0.78136 -1.118 0.2646
Quarter_charQ13 -0.39976 0.78138 -0.512 0.6094
Quarter_charQ14 -0.49201 0.78146 -0.630 0.5296
Quarter_charQ15 -0.49192 0.78146 -0.629 0.5297
Quarter_charQ16 0.80910 0.78151 1.035 0.3016
Quarter_charQ2 -1.27138 0.78172 -1.626 0.1053
Quarter_charQ3 -1.30202 0.78154 -1.666 0.0971 .
Quarter_charQ4 -0.32089 0.78146 -0.411 0.6817
Quarter_charQ5 -0.07797 0.78144 -0.100 0.9206
Quarter_charQ6 -0.90025 0.78141 -1.152 0.2505
Quarter_charQ7 -0.38791 0.78144 -0.496 0.6201
Quarter_charQ8 1.53301 0.78140 1.962 0.0510 .
Quarter_charQ9 -0.45272 0.78144 -0.579 0.5629
treatment NA NA NA NA
Quarter_charQ11:treatment NA NA NA NA
Quarter_charQ12:treatment NA NA NA NA
Quarter_charQ13:treatment NA NA NA NA
Quarter_charQ14:treatment NA NA NA NA
Quarter_charQ15:treatment NA NA NA NA
Quarter_charQ16:treatment NA NA NA NA
Quarter_charQ2:treatment NA NA NA NA
Quarter_charQ3:treatment NA NA NA NA
Quarter_charQ4:treatment NA NA NA NA
Quarter_charQ5:treatment NA NA NA NA
Quarter_charQ6:treatment NA NA NA NA
Quarter_charQ7:treatment NA NA NA NA
Quarter_charQ8:treatment NA NA NA NA
Quarter_charQ9:treatment NA NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.21 on 224 degrees of freedom
Multiple R-squared: 0.1663, Adjusted R-squared: 0.1105
F-statistic: 2.979 on 15 and 224 DF, p-value: 0.000233
# issue with NAs arises from having too many interaction terms. When Quarter is converted to a character variable, each unique quarter value becomes its own level, which leads to a large number of levels for the interaction terms. This can cause problems with collinearity and the model may become overspecified.
#let's see if the same issue applies to CR4 MC model
r3CR4 <- lm(NewFirms ~ CR4MC + Quarter_char + treatment + Quarter_char*treatment, data = df_NA_long)
summary(r3CR4)
Call:
lm(formula = NewFirms ~ CR4MC + Quarter_char + treatment + Quarter_char *
treatment, data = df_NA_long)
Residuals:
Min 1Q Median 3Q Max
-4.0477 -1.2228 -0.3506 0.7151 9.4808
Coefficients: (15 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.2699 0.6233 6.850 7.04e-11 ***
CR4MC -3.5763 0.5411 -6.609 2.79e-10 ***
Quarter_charQ11 -0.2711 0.7443 -0.364 0.7161
Quarter_charQ12 -0.9593 0.7444 -1.289 0.1988
Quarter_charQ13 -0.5163 0.7446 -0.693 0.4888
Quarter_charQ14 -0.6495 0.7450 -0.872 0.3842
Quarter_charQ15 -0.6710 0.7451 -0.900 0.3688
Quarter_charQ16 0.5596 0.7458 0.750 0.4539
Quarter_charQ2 -1.4000 0.7443 -1.881 0.0613 .
Quarter_charQ3 -1.3990 0.7443 -1.880 0.0615 .
Quarter_charQ4 -0.4390 0.7444 -0.590 0.5559
Quarter_charQ5 -0.1757 0.7443 -0.236 0.8136
Quarter_charQ6 -0.9818 0.7443 -1.319 0.1885
Quarter_charQ7 -0.4794 0.7443 -0.644 0.5202
Quarter_charQ8 1.4240 0.7444 1.913 0.0570 .
Quarter_charQ9 -0.5016 0.7443 -0.674 0.5011
treatment NA NA NA NA
Quarter_charQ11:treatment NA NA NA NA
Quarter_charQ12:treatment NA NA NA NA
Quarter_charQ13:treatment NA NA NA NA
Quarter_charQ14:treatment NA NA NA NA
Quarter_charQ15:treatment NA NA NA NA
Quarter_charQ16:treatment NA NA NA NA
Quarter_charQ2:treatment NA NA NA NA
Quarter_charQ3:treatment NA NA NA NA
Quarter_charQ4:treatment NA NA NA NA
Quarter_charQ5:treatment NA NA NA NA
Quarter_charQ6:treatment NA NA NA NA
Quarter_charQ7:treatment NA NA NA NA
Quarter_charQ8:treatment NA NA NA NA
Quarter_charQ9:treatment NA NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.105 on 224 degrees of freedom
Multiple R-squared: 0.2435, Adjusted R-squared: 0.1928
F-statistic: 4.807 on 15 and 224 DF, p-value: 4.585e-08
#same issue
#in conclusion we can be sure that market concentration is a strong predicator of number of new firms regardless of model. The higher the market concentration, the fewer firms enter the market. Second, the plots and the different regressions point to a small positive treatment effect. However, the regression model is exposed to multicollinearity. When trying to control for this with an interaction term, the interaction istelf, but not the treatment effect is significant. Ultimately, the results are inconclusive.